diff --git a/0001-backport-LoongArch-patches.patch b/0001-backport-LoongArch-patches.patch new file mode 100644 index 0000000000000000000000000000000000000000..7328bc0602b18060a1faf2218cc7316caa187778 --- /dev/null +++ b/0001-backport-LoongArch-patches.patch @@ -0,0 +1,4939 @@ +diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst +index 174818417..22067d184 100644 +--- a/llvm/docs/ReleaseNotes.rst ++++ b/llvm/docs/ReleaseNotes.rst +@@ -186,6 +186,8 @@ Changes to the LoongArch Backend + * Initial JITLink support is added. + (`D141036 `_) + ++* The `lp64s` ABI is supported now and has been tested on Rust bare-matal target. ++ + Changes to the MIPS Backend + --------------------------- + +diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def +index 67dbd0201..02bce3c71 100644 +--- a/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def ++++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def +@@ -103,3 +103,18 @@ ELF_RELOC(R_LARCH_TLS_GD_PC_HI20, 97) + ELF_RELOC(R_LARCH_TLS_GD_HI20, 98) + ELF_RELOC(R_LARCH_32_PCREL, 99) + ELF_RELOC(R_LARCH_RELAX, 100) ++ ++// Relocs added in ELF for the LoongArchâ„¢ Architecture v20230519, part of the ++// v2.10 LoongArch ABI specs. ++// ++// Spec addition: https://github.com/loongson/la-abi-specs/pull/1 ++// Binutils commit 57a930e3bfe4b2c7fd6463ed39311e1938513138 ++ELF_RELOC(R_LARCH_DELETE, 101) ++ELF_RELOC(R_LARCH_ALIGN, 102) ++ELF_RELOC(R_LARCH_PCREL20_S2, 103) ++ELF_RELOC(R_LARCH_CFA, 104) ++ELF_RELOC(R_LARCH_ADD6, 105) ++ELF_RELOC(R_LARCH_SUB6, 106) ++ELF_RELOC(R_LARCH_ADD_ULEB128, 107) ++ELF_RELOC(R_LARCH_SUB_ULEB128, 108) ++ELF_RELOC(R_LARCH_64_PCREL, 109) +diff --git a/llvm/include/llvm/TargetParser/LoongArchTargetParser.def b/llvm/include/llvm/TargetParser/LoongArchTargetParser.def +index 4ebdcc012..b20d12495 100644 +--- a/llvm/include/llvm/TargetParser/LoongArchTargetParser.def ++++ b/llvm/include/llvm/TargetParser/LoongArchTargetParser.def +@@ -2,8 +2,6 @@ + #define LOONGARCH_FEATURE(NAME, KIND) + #endif + +-LOONGARCH_FEATURE("invalid", FK_INVALID) +-LOONGARCH_FEATURE("none", FK_NONE) + LOONGARCH_FEATURE("+64bit", FK_64BIT) + LOONGARCH_FEATURE("+f", FK_FP32) + LOONGARCH_FEATURE("+d", FK_FP64) +@@ -11,6 +9,7 @@ LOONGARCH_FEATURE("+lsx", FK_LSX) + LOONGARCH_FEATURE("+lasx", FK_LASX) + LOONGARCH_FEATURE("+lbt", FK_LBT) + LOONGARCH_FEATURE("+lvz", FK_LVZ) ++LOONGARCH_FEATURE("+ual", FK_UAL) + + #undef LOONGARCH_FEATURE + +@@ -18,8 +17,7 @@ LOONGARCH_FEATURE("+lvz", FK_LVZ) + #define LOONGARCH_ARCH(NAME, KIND, FEATURES) + #endif + +-LOONGARCH_ARCH("invalid", AK_INVALID, FK_INVALID) +-LOONGARCH_ARCH("loongarch64", AK_LOONGARCH64, FK_64BIT | FK_FP32 | FK_FP64) +-LOONGARCH_ARCH("la464", AK_LA464, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX) ++LOONGARCH_ARCH("loongarch64", AK_LOONGARCH64, FK_64BIT | FK_FP32 | FK_FP64 | FK_UAL) ++LOONGARCH_ARCH("la464", AK_LA464, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL) + + #undef LOONGARCH_ARCH +diff --git a/llvm/include/llvm/TargetParser/LoongArchTargetParser.h b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h +index 53f9073e4..028844187 100644 +--- a/llvm/include/llvm/TargetParser/LoongArchTargetParser.h ++++ b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h +@@ -23,9 +23,6 @@ class StringRef; + namespace LoongArch { + + enum FeatureKind : uint32_t { +- FK_INVALID = 0, +- FK_NONE = 1, +- + // 64-bit ISA is available. + FK_64BIT = 1 << 1, + +@@ -46,6 +43,9 @@ enum FeatureKind : uint32_t { + + // Loongson Virtualization Extension is available. + FK_LVZ = 1 << 7, ++ ++ // Allow memory accesses to be unaligned. ++ FK_UAL = 1 << 8, + }; + + struct FeatureInfo { +@@ -64,11 +64,14 @@ struct ArchInfo { + uint32_t Features; + }; + +-ArchKind parseArch(StringRef Arch); ++bool isValidArchName(StringRef Arch); + bool getArchFeatures(StringRef Arch, std::vector &Features); ++bool isValidCPUName(StringRef TuneCPU); ++void fillValidCPUList(SmallVectorImpl &Values); ++StringRef getDefaultArch(bool Is64Bit); + + } // namespace LoongArch + + } // namespace llvm + +-#endif // LLVM_SUPPORT_LOONGARCHTARGETPARSER_H ++#endif // LLVM_TARGETPARSER_LOONGARCHTARGETPARSER_H +diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h +index 59513fa2f..5ddb1d314 100644 +--- a/llvm/include/llvm/TargetParser/Triple.h ++++ b/llvm/include/llvm/TargetParser/Triple.h +@@ -846,10 +846,14 @@ public: + : PointerWidth == 64; + } + ++ /// Tests whether the target is 32-bit LoongArch. ++ bool isLoongArch32() const { return getArch() == Triple::loongarch32; } ++ ++ /// Tests whether the target is 64-bit LoongArch. ++ bool isLoongArch64() const { return getArch() == Triple::loongarch64; } ++ + /// Tests whether the target is LoongArch (32- and 64-bit). +- bool isLoongArch() const { +- return getArch() == Triple::loongarch32 || getArch() == Triple::loongarch64; +- } ++ bool isLoongArch() const { return isLoongArch32() || isLoongArch64(); } + + /// Tests whether the target is MIPS 32-bit (little and big endian). + bool isMIPS32() const { +diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +index 9a3609bc1..dc5c443ea 100644 +--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp ++++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +@@ -10247,8 +10247,7 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) { + case ISD::ADD: + SDValue N0 = N.getOperand(0); + SDValue N1 = N.getOperand(1); +- if (!isConstantIntBuildVectorOrConstantInt(N0) && +- isConstantIntBuildVectorOrConstantInt(N1)) { ++ if (!isa(N0) && isa(N1)) { + uint64_t Offset = N.getConstantOperandVal(1); + + // Rewrite an ADD constant node into a DIExpression. Since we are +diff --git a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp +index bc84988e3..e06dea9d5 100644 +--- a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp ++++ b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp +@@ -802,7 +802,8 @@ LLJIT::createObjectLinkingLayer(LLJITBuilderState &S, ExecutionSession &ES) { + + if (S.JTMB->getTargetTriple().isOSBinFormatELF() && + (S.JTMB->getTargetTriple().getArch() == Triple::ArchType::ppc64 || +- S.JTMB->getTargetTriple().getArch() == Triple::ArchType::ppc64le)) ++ S.JTMB->getTargetTriple().getArch() == Triple::ArchType::ppc64le || ++ S.JTMB->getTargetTriple().getArch() == Triple::ArchType::loongarch64)) + Layer->setAutoClaimResponsibilityForObjectSymbols(true); + + // FIXME: Explicit conversion to std::unique_ptr added to silence +diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +index a9aaff424..b154ea287 100644 +--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp ++++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +@@ -987,6 +987,18 @@ uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr, + // and stubs for branches Thumb - ARM and ARM - Thumb. + writeBytesUnaligned(0xe51ff004, Addr, 4); // ldr pc, [pc, #-4] + return Addr + 4; ++ } else if (Arch == Triple::loongarch64) { ++ // lu12i.w $t0, %abs_hi20(addr) ++ // ori $t0, $t0, %abs_lo12(addr) ++ // lu32i.d $t0, %abs64_lo20(addr) ++ // lu52i.d $t0, $t0, %abs64_lo12(addr) ++ // jr $t0 ++ writeBytesUnaligned(0x1400000c, Addr, 4); ++ writeBytesUnaligned(0x0380018c, Addr + 4, 4); ++ writeBytesUnaligned(0x1600000c, Addr + 8, 4); ++ writeBytesUnaligned(0x0300018c, Addr + 12, 4); ++ writeBytesUnaligned(0x4c000180, Addr + 16, 4); ++ return Addr; + } else if (IsMipsO32ABI || IsMipsN32ABI) { + // 0: 3c190000 lui t9,%hi(addr). + // 4: 27390000 addiu t9,t9,%lo(addr). +diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +index 2fe49fefa..f85452bee 100644 +--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp ++++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +@@ -641,6 +641,102 @@ void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section, + } + } + ++// Returns extract bits Val[Hi:Lo]. ++static inline uint32_t extractBits(uint32_t Val, unsigned Hi, unsigned Lo) { ++ return (Val & (((1UL << (Hi + 1)) - 1))) >> Lo; ++} ++ ++void RuntimeDyldELF::resolveLoongArch64Relocation(const SectionEntry &Section, ++ uint64_t Offset, ++ uint64_t Value, uint32_t Type, ++ int64_t Addend) { ++ uint32_t *TargetPtr = ++ reinterpret_cast(Section.getAddressWithOffset(Offset)); ++ uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); ++ ++ LLVM_DEBUG(dbgs() << "resolveLoongArch64Relocation, LocalAddress: 0x" ++ << format("%llx", Section.getAddressWithOffset(Offset)) ++ << " FinalAddress: 0x" << format("%llx", FinalAddress) ++ << " Value: 0x" << format("%llx", Value) << " Type: 0x" ++ << format("%x", Type) << " Addend: 0x" ++ << format("%llx", Addend) << "\n"); ++ ++ switch (Type) { ++ default: ++ report_fatal_error("Relocation type not implemented yet!"); ++ break; ++ case ELF::R_LARCH_32: ++ *(support::little32_t *)TargetPtr = static_cast(Value + Addend); ++ break; ++ case ELF::R_LARCH_64: ++ *(support::little64_t *)TargetPtr = Value + Addend; ++ break; ++ case ELF::R_LARCH_32_PCREL: ++ *(support::little32_t *)TargetPtr = ++ static_cast(Value - FinalAddress + Addend); ++ break; ++ case ELF::R_LARCH_B26: { ++ uint64_t BranchImm = Value - FinalAddress + Addend; ++ assert(isInt<28>(BranchImm)); ++ uint32_t RawInstr = *(support::little32_t *)TargetPtr; ++ uint32_t Imm = static_cast(BranchImm >> 2); ++ uint32_t Imm15_0 = extractBits(Imm, /*Hi=*/15, /*Lo=*/0) << 10; ++ uint32_t Imm25_16 = extractBits(Imm, /*Hi=*/25, /*Lo=*/16); ++ *(support::little32_t *)TargetPtr = RawInstr | Imm15_0 | Imm25_16; ++ break; ++ } ++ case ELF::R_LARCH_GOT_PC_HI20: ++ case ELF::R_LARCH_PCALA_HI20: { ++ uint64_t Target = Value + Addend; ++ uint64_t TargetPage = ++ (Target + (Target & 0x800)) & ~static_cast(0xfff); ++ uint64_t PCPage = FinalAddress & ~static_cast(0xfff); ++ int64_t PageDelta = TargetPage - PCPage; ++ uint32_t RawInstr = *(support::little32_t *)TargetPtr; ++ uint32_t Imm31_12 = extractBits(PageDelta, /*Hi=*/31, /*Lo=*/12) << 5; ++ *(support::little32_t *)TargetPtr = RawInstr | Imm31_12; ++ break; ++ } ++ case ELF::R_LARCH_GOT_PC_LO12: ++ case ELF::R_LARCH_PCALA_LO12: { ++ // TODO: code-model=medium ++ uint64_t TargetOffset = (Value + Addend) & 0xfff; ++ uint32_t RawInstr = *(support::little32_t *)TargetPtr; ++ uint32_t Imm11_0 = TargetOffset << 10; ++ *(support::little32_t *)TargetPtr = RawInstr | Imm11_0; ++ break; ++ } ++ case ELF::R_LARCH_ABS_HI20: { ++ uint64_t Target = Value + Addend; ++ uint32_t RawInstr = *(support::little32_t *)TargetPtr; ++ uint32_t Imm31_12 = extractBits(Target, /*Hi=*/31, /*Lo=*/12) << 5; ++ *(support::little32_t *)TargetPtr = RawInstr | Imm31_12; ++ break; ++ } ++ case ELF::R_LARCH_ABS_LO12: { ++ uint64_t Target = Value + Addend; ++ uint32_t RawInstr = *(support::little32_t *)TargetPtr; ++ uint32_t Imm11_0 = extractBits(Target, /*Hi=*/11, /*Lo=*/0) << 10; ++ *(support::little32_t *)TargetPtr = RawInstr | Imm11_0; ++ break; ++ } ++ case ELF::R_LARCH_ABS64_LO20: { ++ uint64_t Target = Value + Addend; ++ uint32_t RawInstr = *(support::little32_t *)TargetPtr; ++ uint32_t Imm51_32 = extractBits(Target >> 32, /*Hi=*/19, /*Lo=*/0) << 5; ++ *(support::little32_t *)TargetPtr = RawInstr | Imm51_32; ++ break; ++ } ++ case ELF::R_LARCH_ABS64_HI12: { ++ uint64_t Target = Value + Addend; ++ uint32_t RawInstr = *(support::little32_t *)TargetPtr; ++ uint32_t Imm63_52 = extractBits(Target >> 32, /*Hi=*/31, /*Lo=*/20) << 5; ++ *(support::little32_t *)TargetPtr = RawInstr | Imm63_52; ++ break; ++ } ++ } ++} ++ + void RuntimeDyldELF::setMipsABI(const ObjectFile &Obj) { + if (Arch == Triple::UnknownArch || + !StringRef(Triple::getArchTypePrefix(Arch)).equals("mips")) { +@@ -1057,6 +1153,9 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section, + resolveARMRelocation(Section, Offset, (uint32_t)(Value & 0xffffffffL), Type, + (uint32_t)(Addend & 0xffffffffL)); + break; ++ case Triple::loongarch64: ++ resolveLoongArch64Relocation(Section, Offset, Value, Type, Addend); ++ break; + case Triple::ppc: // Fall through. + case Triple::ppcle: + resolvePPC32Relocation(Section, Offset, Value, Type, Addend); +@@ -1209,6 +1308,81 @@ void RuntimeDyldELF::resolveAArch64Branch(unsigned SectionID, + } + } + ++bool RuntimeDyldELF::resolveLoongArch64ShortBranch( ++ unsigned SectionID, relocation_iterator RelI, ++ const RelocationValueRef &Value) { ++ uint64_t Address; ++ if (Value.SymbolName) { ++ auto Loc = GlobalSymbolTable.find(Value.SymbolName); ++ // Don't create direct branch for external symbols. ++ if (Loc == GlobalSymbolTable.end()) ++ return false; ++ const auto &SymInfo = Loc->second; ++ Address = ++ uint64_t(Sections[SymInfo.getSectionID()].getLoadAddressWithOffset( ++ SymInfo.getOffset())); ++ } else { ++ Address = uint64_t(Sections[Value.SectionID].getLoadAddress()); ++ } ++ uint64_t Offset = RelI->getOffset(); ++ uint64_t SourceAddress = Sections[SectionID].getLoadAddressWithOffset(Offset); ++ if (!isInt<28>(Address + Value.Addend - SourceAddress)) ++ return false; ++ resolveRelocation(Sections[SectionID], Offset, Address, RelI->getType(), ++ Value.Addend); ++ return true; ++} ++ ++void RuntimeDyldELF::resolveLoongArch64Branch(unsigned SectionID, ++ const RelocationValueRef &Value, ++ relocation_iterator RelI, ++ StubMap &Stubs) { ++ LLVM_DEBUG(dbgs() << "\t\tThis is an LoongArch64 branch relocation.\n"); ++ SectionEntry &Section = Sections[SectionID]; ++ uint64_t Offset = RelI->getOffset(); ++ unsigned RelType = RelI->getType(); ++ // Look for an existing stub. ++ StubMap::const_iterator i = Stubs.find(Value); ++ if (i != Stubs.end()) { ++ resolveRelocation(Section, Offset, ++ (uint64_t)Section.getAddressWithOffset(i->second), ++ RelType, 0); ++ LLVM_DEBUG(dbgs() << " Stub function found\n"); ++ } else if (!resolveLoongArch64ShortBranch(SectionID, RelI, Value)) { ++ // Create a new stub function. ++ LLVM_DEBUG(dbgs() << " Create a new stub function\n"); ++ Stubs[Value] = Section.getStubOffset(); ++ uint8_t *StubTargetAddr = createStubFunction( ++ Section.getAddressWithOffset(Section.getStubOffset())); ++ RelocationEntry LU12I_W(SectionID, StubTargetAddr - Section.getAddress(), ++ ELF::R_LARCH_ABS_HI20, Value.Addend); ++ RelocationEntry ORI(SectionID, StubTargetAddr - Section.getAddress() + 4, ++ ELF::R_LARCH_ABS_LO12, Value.Addend); ++ RelocationEntry LU32I_D(SectionID, ++ StubTargetAddr - Section.getAddress() + 8, ++ ELF::R_LARCH_ABS64_LO20, Value.Addend); ++ RelocationEntry LU52I_D(SectionID, ++ StubTargetAddr - Section.getAddress() + 12, ++ ELF::R_LARCH_ABS64_HI12, Value.Addend); ++ if (Value.SymbolName) { ++ addRelocationForSymbol(LU12I_W, Value.SymbolName); ++ addRelocationForSymbol(ORI, Value.SymbolName); ++ addRelocationForSymbol(LU32I_D, Value.SymbolName); ++ addRelocationForSymbol(LU52I_D, Value.SymbolName); ++ } else { ++ addRelocationForSection(LU12I_W, Value.SectionID); ++ addRelocationForSection(ORI, Value.SectionID); ++ addRelocationForSection(LU32I_D, Value.SectionID); ++ addRelocationForSection(LU52I_D, Value.SectionID); ++ } ++ resolveRelocation(Section, Offset, ++ reinterpret_cast(Section.getAddressWithOffset( ++ Section.getStubOffset())), ++ RelType, 0); ++ Section.advanceStubOffset(getMaxStubSize()); ++ } ++} ++ + Expected + RuntimeDyldELF::processRelocationRef( + unsigned SectionID, relocation_iterator RelI, const ObjectFile &O, +@@ -1369,6 +1543,25 @@ RuntimeDyldELF::processRelocationRef( + } + processSimpleRelocation(SectionID, Offset, RelType, Value); + } ++ } else if (Arch == Triple::loongarch64) { ++ if (RelType == ELF::R_LARCH_B26 && MemMgr.allowStubAllocation()) { ++ resolveLoongArch64Branch(SectionID, Value, RelI, Stubs); ++ } else if (RelType == ELF::R_LARCH_GOT_PC_HI20 || ++ RelType == ELF::R_LARCH_GOT_PC_LO12) { ++ // FIXME: This will create redundant got entry. ++ uint64_t GOTOffset = allocateGOTEntries(1); ++ // Create relocation for newly created GOT entry. ++ RelocationEntry RE = ++ computeGOTOffsetRE(GOTOffset, Value.Offset, ELF::R_LARCH_64); ++ if (Value.SymbolName) ++ addRelocationForSymbol(RE, Value.SymbolName); ++ else ++ addRelocationForSection(RE, Value.SectionID); ++ resolveGOTOffsetRelocation(SectionID, Offset, GOTOffset + Addend, ++ RelType); ++ } else { ++ processSimpleRelocation(SectionID, Offset, RelType, Value); ++ } + } else if (IsMipsO32ABI) { + uint8_t *Placeholder = reinterpret_cast( + computePlaceholderAddress(SectionID, Offset)); +@@ -2214,6 +2407,7 @@ size_t RuntimeDyldELF::getGOTEntrySize() { + case Triple::x86_64: + case Triple::aarch64: + case Triple::aarch64_be: ++ case Triple::loongarch64: + case Triple::ppc64: + case Triple::ppc64le: + case Triple::systemz: +@@ -2525,6 +2719,10 @@ bool RuntimeDyldELF::relocationNeedsGot(const RelocationRef &R) const { + return RelTy == ELF::R_AARCH64_ADR_GOT_PAGE || + RelTy == ELF::R_AARCH64_LD64_GOT_LO12_NC; + ++ if (Arch == Triple::loongarch64) ++ return RelTy == ELF::R_LARCH_GOT_PC_HI20 || ++ RelTy == ELF::R_LARCH_GOT_PC_LO12; ++ + if (Arch == Triple::x86_64) + return RelTy == ELF::R_X86_64_GOTPCREL || + RelTy == ELF::R_X86_64_GOTPCRELX || +diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h +index dfdd98cb3..2c930219c 100644 +--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h ++++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h +@@ -48,6 +48,18 @@ class RuntimeDyldELF : public RuntimeDyldImpl { + void resolveARMRelocation(const SectionEntry &Section, uint64_t Offset, + uint32_t Value, uint32_t Type, int32_t Addend); + ++ void resolveLoongArch64Relocation(const SectionEntry &Section, ++ uint64_t Offset, uint64_t Value, ++ uint32_t Type, int64_t Addend); ++ ++ bool resolveLoongArch64ShortBranch(unsigned SectionID, ++ relocation_iterator RelI, ++ const RelocationValueRef &Value); ++ ++ void resolveLoongArch64Branch(unsigned SectionID, ++ const RelocationValueRef &Value, ++ relocation_iterator RelI, StubMap &Stubs); ++ + void resolvePPC32Relocation(const SectionEntry &Section, uint64_t Offset, + uint64_t Value, uint32_t Type, int64_t Addend); + +@@ -69,6 +81,8 @@ class RuntimeDyldELF : public RuntimeDyldImpl { + return 16; + else if (IsMipsN64ABI) + return 32; ++ if (Arch == Triple::loongarch64) ++ return 20; // lu12i.w; ori; lu32i.d; lu52i.d; jr + else if (Arch == Triple::ppc64 || Arch == Triple::ppc64le) + return 44; + else if (Arch == Triple::x86_64) +diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td +index 7e5c3563f..3e9e8b251 100644 +--- a/llvm/lib/Target/LoongArch/LoongArch.td ++++ b/llvm/lib/Target/LoongArch/LoongArch.td +@@ -115,6 +115,11 @@ def HasLaLocalWithAbs + AssemblerPredicate<(all_of LaLocalWithAbs), + "Expand la.local as la.abs">; + ++// Unaligned memory access ++def FeatureUAL ++ : SubtargetFeature<"ual", "HasUAL", "true", ++ "Allow memory accesses to be unaligned">; ++ + //===----------------------------------------------------------------------===// + // Registers, instruction descriptions ... + //===----------------------------------------------------------------------===// +@@ -128,13 +133,19 @@ include "LoongArchInstrInfo.td" + //===----------------------------------------------------------------------===// + + def : ProcessorModel<"generic-la32", NoSchedModel, [Feature32Bit]>; +-def : ProcessorModel<"generic-la64", NoSchedModel, [Feature64Bit]>; ++def : ProcessorModel<"generic-la64", NoSchedModel, [Feature64Bit, FeatureUAL]>; ++ ++// Generic 64-bit processor with double-precision floating-point support. ++def : ProcessorModel<"loongarch64", NoSchedModel, [Feature64Bit, ++ FeatureUAL, ++ FeatureBasicD]>; + + // Support generic for compatibility with other targets. The triple will be used + // to change to the appropriate la32/la64 version. + def : ProcessorModel<"generic", NoSchedModel, []>; + + def : ProcessorModel<"la464", NoSchedModel, [Feature64Bit, ++ FeatureUAL, + FeatureExtLASX, + FeatureExtLVZ, + FeatureExtLBT]>; +diff --git a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp +index 6d9cb5e17..04fdd41d6 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp +@@ -35,6 +35,12 @@ void LoongArchAsmPrinter::emitInstruction(const MachineInstr *MI) { + if (emitPseudoExpansionLowering(*OutStreamer, MI)) + return; + ++ switch (MI->getOpcode()) { ++ case TargetOpcode::PATCHABLE_FUNCTION_ENTER: ++ LowerPATCHABLE_FUNCTION_ENTER(*MI); ++ return; ++ } ++ + MCInst TmpInst; + if (!lowerLoongArchMachineInstrToMCInst(MI, TmpInst, *this)) + EmitToStreamer(*OutStreamer, TmpInst); +@@ -110,6 +116,22 @@ bool LoongArchAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + return false; + } + ++void LoongArchAsmPrinter::LowerPATCHABLE_FUNCTION_ENTER( ++ const MachineInstr &MI) { ++ const Function &F = MF->getFunction(); ++ if (F.hasFnAttribute("patchable-function-entry")) { ++ unsigned Num; ++ if (F.getFnAttribute("patchable-function-entry") ++ .getValueAsString() ++ .getAsInteger(10, Num)) ++ return; ++ emitNops(Num); ++ return; ++ } ++ ++ // TODO: Emit sled here once we get support for XRay. ++} ++ + bool LoongArchAsmPrinter::runOnMachineFunction(MachineFunction &MF) { + AsmPrinter::runOnMachineFunction(MF); + return true; +diff --git a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h +index 23e293547..c8bf657f8 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h ++++ b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h +@@ -41,6 +41,8 @@ public: + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, raw_ostream &OS) override; + ++ void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI); ++ + // tblgen'erated function. + bool emitPseudoExpansionLowering(MCStreamer &OutStreamer, + const MachineInstr *MI); +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index a4a82bdef..19baa4b59 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -597,13 +597,12 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, + } + } + +-// Helper function that emits error message for intrinsics with chain. ++// Helper function that emits error message for intrinsics with chain and return ++// merge values of a UNDEF and the chain. + static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, + StringRef ErrorMsg, + SelectionDAG &DAG) { +- +- DAG.getContext()->emitError("argument to '" + Op->getOperationName(0) + "' " + +- ErrorMsg); ++ DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + "."); + return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)}, + SDLoc(Op)); + } +@@ -613,9 +612,11 @@ LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + MVT GRLenVT = Subtarget.getGRLenVT(); +- SDValue Op0 = Op.getOperand(0); +- std::string Name = Op->getOperationName(0); +- const StringRef ErrorMsgOOR = "out of range"; ++ EVT VT = Op.getValueType(); ++ SDValue Chain = Op.getOperand(0); ++ const StringRef ErrorMsgOOR = "argument out of range"; ++ const StringRef ErrorMsgReqLA64 = "requires loongarch64"; ++ const StringRef ErrorMsgReqF = "requires basic 'f' target feature"; + + switch (Op.getConstantOperandVal(1)) { + default: +@@ -627,115 +628,76 @@ LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, + case Intrinsic::loongarch_crcc_w_b_w: + case Intrinsic::loongarch_crcc_w_h_w: + case Intrinsic::loongarch_crcc_w_w_w: +- case Intrinsic::loongarch_crcc_w_d_w: { +- std::string Name = Op->getOperationName(0); +- DAG.getContext()->emitError(Name + " requires target: loongarch64"); +- return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op0}, DL); +- } ++ case Intrinsic::loongarch_crcc_w_d_w: ++ return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG); + case Intrinsic::loongarch_csrrd_w: + case Intrinsic::loongarch_csrrd_d: { + unsigned Imm = cast(Op.getOperand(2))->getZExtValue(); +- if (!isUInt<14>(Imm)) +- return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG); +- return DAG.getMergeValues( +- {DAG.getNode(LoongArchISD::CSRRD, DL, GRLenVT, Op0, +- DAG.getConstant(Imm, DL, GRLenVT)), +- Op0}, +- DL); ++ return !isUInt<14>(Imm) ++ ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) ++ : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other}, ++ {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); + } + case Intrinsic::loongarch_csrwr_w: + case Intrinsic::loongarch_csrwr_d: { + unsigned Imm = cast(Op.getOperand(3))->getZExtValue(); +- if (!isUInt<14>(Imm)) +- return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG); +- return DAG.getMergeValues( +- {DAG.getNode(LoongArchISD::CSRWR, DL, GRLenVT, Op0, Op.getOperand(2), +- DAG.getConstant(Imm, DL, GRLenVT)), +- Op0}, +- DL); ++ return !isUInt<14>(Imm) ++ ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) ++ : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other}, ++ {Chain, Op.getOperand(2), ++ DAG.getConstant(Imm, DL, GRLenVT)}); + } + case Intrinsic::loongarch_csrxchg_w: + case Intrinsic::loongarch_csrxchg_d: { + unsigned Imm = cast(Op.getOperand(4))->getZExtValue(); +- if (!isUInt<14>(Imm)) +- return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG); +- return DAG.getMergeValues( +- {DAG.getNode(LoongArchISD::CSRXCHG, DL, GRLenVT, Op0, Op.getOperand(2), +- Op.getOperand(3), DAG.getConstant(Imm, DL, GRLenVT)), +- Op0}, +- DL); ++ return !isUInt<14>(Imm) ++ ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) ++ : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other}, ++ {Chain, Op.getOperand(2), Op.getOperand(3), ++ DAG.getConstant(Imm, DL, GRLenVT)}); + } + case Intrinsic::loongarch_iocsrrd_d: { +- if (Subtarget.is64Bit()) +- return DAG.getMergeValues( +- {DAG.getNode( +- LoongArchISD::IOCSRRD_D, DL, GRLenVT, Op0, +- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))), +- Op0}, +- DL); +- else { +- DAG.getContext()->emitError( +- "llvm.loongarch.crc.w.d.w requires target: loongarch64"); +- return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op0}, DL); +- } ++ return DAG.getNode( ++ LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other}, ++ {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))}); + } + #define IOCSRRD_CASE(NAME, NODE) \ + case Intrinsic::loongarch_##NAME: { \ +- return DAG.getMergeValues( \ +- {DAG.getNode(LoongArchISD::NODE, DL, GRLenVT, Op0, Op.getOperand(2)), \ +- Op0}, \ +- DL); \ ++ return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \ ++ {Chain, Op.getOperand(2)}); \ + } + IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B); + IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H); + IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W); + #undef IOCSRRD_CASE + case Intrinsic::loongarch_cpucfg: { +- return DAG.getMergeValues( +- {DAG.getNode(LoongArchISD::CPUCFG, DL, GRLenVT, Op0, Op.getOperand(2)), +- Op0}, +- DL); ++ return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other}, ++ {Chain, Op.getOperand(2)}); + } + case Intrinsic::loongarch_lddir_d: { + unsigned Imm = cast(Op.getOperand(3))->getZExtValue(); +- if (!isUInt<8>(Imm)) { +- DAG.getContext()->emitError("argument to '" + Op->getOperationName(0) + +- "' out of range"); +- return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op0}, DL); +- } +- +- return Op; ++ return !isUInt<8>(Imm) ++ ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) ++ : Op; + } + case Intrinsic::loongarch_movfcsr2gr: { +- if (!Subtarget.hasBasicF()) { +- DAG.getContext()->emitError( +- "llvm.loongarch.movfcsr2gr expects basic f target feature"); +- return DAG.getMergeValues( +- {DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)}, SDLoc(Op)); +- } ++ if (!Subtarget.hasBasicF()) ++ return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG); + unsigned Imm = cast(Op.getOperand(2))->getZExtValue(); +- if (!isUInt<2>(Imm)) { +- DAG.getContext()->emitError("argument to '" + Op->getOperationName(0) + +- "' " + ErrorMsgOOR); +- return DAG.getMergeValues( +- {DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)}, SDLoc(Op)); +- } +- return DAG.getMergeValues( +- {DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, Op.getValueType(), +- DAG.getConstant(Imm, DL, GRLenVT)), +- Op.getOperand(0)}, +- DL); ++ return !isUInt<2>(Imm) ++ ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) ++ : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other}, ++ {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); + } + } + } + + // Helper function that emits error message for intrinsics with void return +-// value. ++// value and return the chain. + static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, + SelectionDAG &DAG) { + +- DAG.getContext()->emitError("argument to '" + Op->getOperationName(0) + "' " + +- ErrorMsg); ++ DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + "."); + return Op.getOperand(0); + } + +@@ -743,10 +705,13 @@ SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + MVT GRLenVT = Subtarget.getGRLenVT(); +- SDValue Op0 = Op.getOperand(0); ++ SDValue Chain = Op.getOperand(0); + uint64_t IntrinsicEnum = Op.getConstantOperandVal(1); + SDValue Op2 = Op.getOperand(2); +- const StringRef ErrorMsgOOR = "out of range"; ++ const StringRef ErrorMsgOOR = "argument out of range"; ++ const StringRef ErrorMsgReqLA64 = "requires loongarch64"; ++ const StringRef ErrorMsgReqLA32 = "requires loongarch32"; ++ const StringRef ErrorMsgReqF = "requires basic 'f' target feature"; + + switch (IntrinsicEnum) { + default: +@@ -754,122 +719,93 @@ SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, + return SDValue(); + case Intrinsic::loongarch_cacop_d: + case Intrinsic::loongarch_cacop_w: { +- if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit()) { +- DAG.getContext()->emitError( +- "llvm.loongarch.cacop.d requires target: loongarch64"); +- return Op.getOperand(0); +- } +- if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit()) { +- DAG.getContext()->emitError( +- "llvm.loongarch.cacop.w requires target: loongarch32"); +- return Op.getOperand(0); +- } ++ if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit()) ++ return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG); ++ if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit()) ++ return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG); + // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12) + unsigned Imm1 = cast(Op2)->getZExtValue(); +- if (!isUInt<5>(Imm1)) +- return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG); +- SDValue Op4 = Op.getOperand(4); +- int Imm2 = cast(Op4)->getSExtValue(); +- if (!isInt<12>(Imm2)) ++ int Imm2 = cast(Op.getOperand(4))->getSExtValue(); ++ if (!isUInt<5>(Imm1) || !isInt<12>(Imm2)) + return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG); +- + return Op; + } +- + case Intrinsic::loongarch_dbar: { + unsigned Imm = cast(Op2)->getZExtValue(); +- if (!isUInt<15>(Imm)) +- return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG); +- +- return DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Op0, +- DAG.getConstant(Imm, DL, GRLenVT)); ++ return !isUInt<15>(Imm) ++ ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) ++ : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain, ++ DAG.getConstant(Imm, DL, GRLenVT)); + } + case Intrinsic::loongarch_ibar: { + unsigned Imm = cast(Op2)->getZExtValue(); +- if (!isUInt<15>(Imm)) +- return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG); +- +- return DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Op0, +- DAG.getConstant(Imm, DL, GRLenVT)); ++ return !isUInt<15>(Imm) ++ ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) ++ : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain, ++ DAG.getConstant(Imm, DL, GRLenVT)); + } + case Intrinsic::loongarch_break: { + unsigned Imm = cast(Op2)->getZExtValue(); +- if (!isUInt<15>(Imm)) +- return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG); +- +- return DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Op0, +- DAG.getConstant(Imm, DL, GRLenVT)); ++ return !isUInt<15>(Imm) ++ ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) ++ : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain, ++ DAG.getConstant(Imm, DL, GRLenVT)); + } + case Intrinsic::loongarch_movgr2fcsr: { +- if (!Subtarget.hasBasicF()) { +- DAG.getContext()->emitError( +- "llvm.loongarch.movgr2fcsr expects basic f target feature"); +- return Op0; +- } ++ if (!Subtarget.hasBasicF()) ++ return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG); + unsigned Imm = cast(Op2)->getZExtValue(); +- if (!isUInt<2>(Imm)) +- return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG); +- +- return DAG.getNode( +- LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Op0, +- DAG.getConstant(Imm, DL, GRLenVT), +- DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Op.getOperand(3))); ++ return !isUInt<2>(Imm) ++ ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) ++ : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain, ++ DAG.getConstant(Imm, DL, GRLenVT), ++ DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, ++ Op.getOperand(3))); + } + case Intrinsic::loongarch_syscall: { + unsigned Imm = cast(Op2)->getZExtValue(); +- if (!isUInt<15>(Imm)) +- return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG); +- +- return DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Op0, +- DAG.getConstant(Imm, DL, GRLenVT)); ++ return !isUInt<15>(Imm) ++ ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) ++ : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain, ++ DAG.getConstant(Imm, DL, GRLenVT)); + } + #define IOCSRWR_CASE(NAME, NODE) \ + case Intrinsic::loongarch_##NAME: { \ + SDValue Op3 = Op.getOperand(3); \ +- if (Subtarget.is64Bit()) \ +- return DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Op0, \ +- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \ +- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)); \ +- else \ +- return DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Op0, Op2, Op3); \ ++ return Subtarget.is64Bit() \ ++ ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \ ++ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \ ++ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \ ++ : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \ ++ Op3); \ + } + IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B); + IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H); + IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W); + #undef IOCSRWR_CASE + case Intrinsic::loongarch_iocsrwr_d: { +- if (Subtarget.is64Bit()) +- return DAG.getNode( +- LoongArchISD::IOCSRWR_D, DL, MVT::Other, Op0, Op2, +- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(3))); +- else { +- DAG.getContext()->emitError( +- "llvm.loongarch.iocsrwr.d requires target: loongarch64"); +- return Op.getOperand(0); +- } ++ return !Subtarget.is64Bit() ++ ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) ++ : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain, ++ Op2, ++ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, ++ Op.getOperand(3))); + } + #define ASRT_LE_GT_CASE(NAME) \ + case Intrinsic::loongarch_##NAME: { \ +- if (!Subtarget.is64Bit()) { \ +- DAG.getContext()->emitError(Op->getOperationName(0) + \ +- " requires target: loongarch64"); \ +- return Op.getOperand(0); \ +- } \ +- return Op; \ ++ return !Subtarget.is64Bit() \ ++ ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \ ++ : Op; \ + } + ASRT_LE_GT_CASE(asrtle_d) + ASRT_LE_GT_CASE(asrtgt_d) + #undef ASRT_LE_GT_CASE + case Intrinsic::loongarch_ldpte_d: { + unsigned Imm = cast(Op.getOperand(3))->getZExtValue(); +- if (!isUInt<8>(Imm)) +- return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG); +- if (!Subtarget.is64Bit()) { +- DAG.getContext()->emitError(Op->getOperationName(0) + +- " requires target: loongarch64"); +- return Op.getOperand(0); +- } +- return Op; ++ return !Subtarget.is64Bit() ++ ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) ++ : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) ++ : Op; + } + } + } +@@ -1022,6 +958,16 @@ static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, + return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); + } + ++// Helper function that emits error message for intrinsics with chain and return ++// a UNDEF and the chain as the results. ++static void emitErrorAndReplaceIntrinsicWithChainResults( ++ SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG, ++ StringRef ErrorMsg) { ++ DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + "."); ++ Results.push_back(DAG.getUNDEF(N->getValueType(0))); ++ Results.push_back(N->getOperand(0)); ++} ++ + void LoongArchTargetLowering::ReplaceNodeResults( + SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { + SDLoc DL(N); +@@ -1142,50 +1088,44 @@ void LoongArchTargetLowering::ReplaceNodeResults( + break; + } + case ISD::INTRINSIC_W_CHAIN: { +- SDValue Op0 = N->getOperand(0); +- EVT VT = N->getValueType(0); +- uint64_t Op1 = N->getConstantOperandVal(1); ++ SDValue Chain = N->getOperand(0); ++ SDValue Op2 = N->getOperand(2); + MVT GRLenVT = Subtarget.getGRLenVT(); +- if (Op1 == Intrinsic::loongarch_movfcsr2gr) { ++ const StringRef ErrorMsgOOR = "argument out of range"; ++ const StringRef ErrorMsgReqLA64 = "requires loongarch64"; ++ const StringRef ErrorMsgReqF = "requires basic 'f' target feature"; ++ ++ switch (N->getConstantOperandVal(1)) { ++ default: ++ llvm_unreachable("Unexpected Intrinsic."); ++ case Intrinsic::loongarch_movfcsr2gr: { + if (!Subtarget.hasBasicF()) { +- DAG.getContext()->emitError( +- "llvm.loongarch.movfcsr2gr expects basic f target feature"); +- Results.push_back(DAG.getMergeValues( +- {DAG.getUNDEF(N->getValueType(0)), N->getOperand(0)}, SDLoc(N))); +- Results.push_back(N->getOperand(0)); ++ emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, ++ ErrorMsgReqF); + return; + } +- unsigned Imm = cast(N->getOperand(2))->getZExtValue(); ++ unsigned Imm = cast(Op2)->getZExtValue(); + if (!isUInt<2>(Imm)) { +- DAG.getContext()->emitError("argument to '" + N->getOperationName(0) + +- "' " + "out of range"); +- Results.push_back(DAG.getMergeValues( +- {DAG.getUNDEF(N->getValueType(0)), N->getOperand(0)}, SDLoc(N))); +- Results.push_back(N->getOperand(0)); ++ emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, ++ ErrorMsgOOR); + return; + } ++ SDValue MOVFCSR2GRResults = DAG.getNode( ++ LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other}, ++ {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); + Results.push_back( +- DAG.getNode(ISD::TRUNCATE, DL, VT, +- DAG.getNode(LoongArchISD::MOVFCSR2GR, SDLoc(N), MVT::i64, +- DAG.getConstant(Imm, DL, GRLenVT)))); +- Results.push_back(N->getOperand(0)); +- return; ++ DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0))); ++ Results.push_back(MOVFCSR2GRResults.getValue(1)); ++ break; + } +- SDValue Op2 = N->getOperand(2); +- std::string Name = N->getOperationName(0); +- +- switch (Op1) { +- default: +- llvm_unreachable("Unexpected Intrinsic."); + #define CRC_CASE_EXT_BINARYOP(NAME, NODE) \ + case Intrinsic::loongarch_##NAME: { \ +- Results.push_back(DAG.getNode( \ +- ISD::TRUNCATE, DL, VT, \ +- DAG.getNode( \ +- LoongArchISD::NODE, DL, MVT::i64, \ +- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \ +- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))))); \ +- Results.push_back(N->getOperand(0)); \ ++ SDValue NODE = DAG.getNode( \ ++ LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \ ++ {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \ ++ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \ ++ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \ ++ Results.push_back(NODE.getValue(1)); \ + break; \ + } + CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W) +@@ -1198,12 +1138,12 @@ void LoongArchTargetLowering::ReplaceNodeResults( + + #define CRC_CASE_EXT_UNARYOP(NAME, NODE) \ + case Intrinsic::loongarch_##NAME: { \ +- Results.push_back( \ +- DAG.getNode(ISD::TRUNCATE, DL, VT, \ +- DAG.getNode(LoongArchISD::NODE, DL, MVT::i64, Op2, \ +- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, \ +- N->getOperand(3))))); \ +- Results.push_back(N->getOperand(0)); \ ++ SDValue NODE = DAG.getNode( \ ++ LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \ ++ {Chain, Op2, \ ++ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \ ++ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \ ++ Results.push_back(NODE.getValue(1)); \ + break; \ + } + CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W) +@@ -1211,11 +1151,9 @@ void LoongArchTargetLowering::ReplaceNodeResults( + #undef CRC_CASE_EXT_UNARYOP + #define CSR_CASE(ID) \ + case Intrinsic::loongarch_##ID: { \ +- if (!Subtarget.is64Bit()) { \ +- DAG.getContext()->emitError(Name + " requires target: loongarch64"); \ +- Results.push_back(DAG.getUNDEF(VT)); \ +- Results.push_back(N->getOperand(0)); \ +- } \ ++ if (!Subtarget.is64Bit()) \ ++ emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, \ ++ ErrorMsgReqLA64); \ + break; \ + } + CSR_CASE(csrrd_d); +@@ -1226,62 +1164,59 @@ void LoongArchTargetLowering::ReplaceNodeResults( + case Intrinsic::loongarch_csrrd_w: { + unsigned Imm = cast(Op2)->getZExtValue(); + if (!isUInt<14>(Imm)) { +- DAG.getContext()->emitError("argument to '" + Name + "' out of range"); +- Results.push_back(DAG.getUNDEF(VT)); +- Results.push_back(N->getOperand(0)); +- break; ++ emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, ++ ErrorMsgOOR); ++ return; + } +- ++ SDValue CSRRDResults = ++ DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other}, ++ {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); + Results.push_back( +- DAG.getNode(ISD::TRUNCATE, DL, VT, +- DAG.getNode(LoongArchISD::CSRRD, DL, GRLenVT, Op0, +- DAG.getConstant(Imm, DL, GRLenVT)))); +- Results.push_back(N->getOperand(0)); ++ DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0))); ++ Results.push_back(CSRRDResults.getValue(1)); + break; + } + case Intrinsic::loongarch_csrwr_w: { + unsigned Imm = cast(N->getOperand(3))->getZExtValue(); + if (!isUInt<14>(Imm)) { +- DAG.getContext()->emitError("argument to '" + Name + "' out of range"); +- Results.push_back(DAG.getUNDEF(VT)); +- Results.push_back(N->getOperand(0)); +- break; ++ emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, ++ ErrorMsgOOR); ++ return; + } +- +- Results.push_back(DAG.getNode( +- ISD::TRUNCATE, DL, VT, +- DAG.getNode(LoongArchISD::CSRWR, DL, GRLenVT, Op0, +- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), +- DAG.getConstant(Imm, DL, GRLenVT)))); +- Results.push_back(N->getOperand(0)); ++ SDValue CSRWRResults = ++ DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other}, ++ {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), ++ DAG.getConstant(Imm, DL, GRLenVT)}); ++ Results.push_back( ++ DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0))); ++ Results.push_back(CSRWRResults.getValue(1)); + break; + } + case Intrinsic::loongarch_csrxchg_w: { + unsigned Imm = cast(N->getOperand(4))->getZExtValue(); + if (!isUInt<14>(Imm)) { +- DAG.getContext()->emitError("argument to '" + Name + "' out of range"); +- Results.push_back(DAG.getUNDEF(VT)); +- Results.push_back(N->getOperand(0)); +- break; ++ emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, ++ ErrorMsgOOR); ++ return; + } +- +- Results.push_back(DAG.getNode( +- ISD::TRUNCATE, DL, VT, +- DAG.getNode( +- LoongArchISD::CSRXCHG, DL, GRLenVT, Op0, +- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), +- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)), +- DAG.getConstant(Imm, DL, GRLenVT)))); +- Results.push_back(N->getOperand(0)); ++ SDValue CSRXCHGResults = DAG.getNode( ++ LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other}, ++ {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), ++ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)), ++ DAG.getConstant(Imm, DL, GRLenVT)}); ++ Results.push_back( ++ DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0))); ++ Results.push_back(CSRXCHGResults.getValue(1)); + break; + } + #define IOCSRRD_CASE(NAME, NODE) \ + case Intrinsic::loongarch_##NAME: { \ +- Results.push_back(DAG.getNode( \ +- ISD::TRUNCATE, DL, N->getValueType(0), \ +- DAG.getNode(LoongArchISD::NODE, DL, MVT::i64, Op0, \ +- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)))); \ +- Results.push_back(N->getOperand(0)); \ ++ SDValue IOCSRRDResults = \ ++ DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \ ++ {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \ ++ Results.push_back( \ ++ DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \ ++ Results.push_back(IOCSRRDResults.getValue(1)); \ + break; \ + } + IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B); +@@ -1289,20 +1224,19 @@ void LoongArchTargetLowering::ReplaceNodeResults( + IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W); + #undef IOCSRRD_CASE + case Intrinsic::loongarch_cpucfg: { +- Results.push_back(DAG.getNode( +- ISD::TRUNCATE, DL, VT, +- DAG.getNode(LoongArchISD::CPUCFG, DL, GRLenVT, Op0, +- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)))); +- Results.push_back(Op0); ++ SDValue CPUCFGResults = ++ DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other}, ++ {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); ++ Results.push_back( ++ DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0))); ++ Results.push_back(CPUCFGResults.getValue(1)); + break; + } + case Intrinsic::loongarch_lddir_d: { + if (!Subtarget.is64Bit()) { +- DAG.getContext()->emitError(N->getOperationName(0) + +- " requires target: loongarch64"); +- Results.push_back(DAG.getUNDEF(VT)); +- Results.push_back(Op0); +- break; ++ emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, ++ ErrorMsgReqLA64); ++ return; + } + break; + } +@@ -1764,6 +1698,18 @@ MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( + } + } + ++bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses( ++ EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, ++ unsigned *Fast) const { ++ if (!Subtarget.hasUAL()) ++ return false; ++ ++ // TODO: set reasonable speed number. ++ if (Fast) ++ *Fast = 1; ++ return true; ++} ++ + const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch ((LoongArchISD::NodeType)Opcode) { + case LoongArchISD::FIRST_NUMBER: +@@ -1907,7 +1853,6 @@ static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, + default: + llvm_unreachable("Unexpected ABI"); + case LoongArchABI::ABI_ILP32S: +- case LoongArchABI::ABI_LP64S: + case LoongArchABI::ABI_ILP32F: + case LoongArchABI::ABI_LP64F: + report_fatal_error("Unimplemented ABI"); +@@ -1916,6 +1861,8 @@ static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, + case LoongArchABI::ABI_LP64D: + UseGPRForFloat = !IsFixed; + break; ++ case LoongArchABI::ABI_LP64S: ++ break; + } + + // FPR32 and FPR64 alias each other. +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +index 0ddcda66d..62c83384c 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +@@ -80,7 +80,22 @@ enum NodeType : unsigned { + CRCC_W_D_W, + + CSRRD, ++ ++ // Write new value to CSR and return old value. ++ // Operand 0: A chain pointer. ++ // Operand 1: The new value to write. ++ // Operand 2: The address of the required CSR. ++ // Result 0: The old value of the CSR. ++ // Result 1: The new chain pointer. + CSRWR, ++ ++ // Similar to CSRWR but with a write mask. ++ // Operand 0: A chain pointer. ++ // Operand 1: The new value to write. ++ // Operand 2: The write mask. ++ // Operand 3: The address of the required CSR. ++ // Result 0: The old value of the CSR. ++ // Result 1: The new chain pointer. + CSRXCHG, + + // IOCSR access operations +@@ -181,6 +196,11 @@ public: + bool decomposeMulByConstant(LLVMContext &Context, EVT VT, + SDValue C) const override; + ++ bool allowsMisalignedMemoryAccesses( ++ EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1), ++ MachineMemOperand::Flags Flags = MachineMemOperand::MONone, ++ unsigned *Fast = nullptr) const override; ++ + private: + /// Target-specific function used to lower LoongArch calling conventions. + typedef bool LoongArchCCAssignFn(const DataLayout &DL, LoongArchABI::ABI ABI, +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +index fbbb764b8..3e19f3e2f 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +@@ -17,6 +17,7 @@ + #include "MCTargetDesc/LoongArchMCTargetDesc.h" + #include "MCTargetDesc/LoongArchMatInt.h" + #include "llvm/CodeGen/RegisterScavenging.h" ++#include "llvm/MC/MCInstBuilder.h" + + using namespace llvm; + +@@ -28,6 +29,13 @@ LoongArchInstrInfo::LoongArchInstrInfo(LoongArchSubtarget &STI) + LoongArch::ADJCALLSTACKUP), + STI(STI) {} + ++MCInst LoongArchInstrInfo::getNop() const { ++ return MCInstBuilder(LoongArch::ANDI) ++ .addReg(LoongArch::R0) ++ .addReg(LoongArch::R0) ++ .addImm(0); ++} ++ + void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, MCRegister DstReg, +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h +index e2b80460f..cf83abf27 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h +@@ -27,6 +27,8 @@ class LoongArchInstrInfo : public LoongArchGenInstrInfo { + public: + explicit LoongArchInstrInfo(LoongArchSubtarget &STI); + ++ MCInst getNop() const override; ++ + void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg, + bool KillSrc) const override; +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +index 75b2adc72..f20beee92 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +@@ -75,21 +75,21 @@ def loongarch_srl_w : SDNode<"LoongArchISD::SRL_W", SDT_LoongArchIntBinOpW>; + def loongarch_rotr_w : SDNode<"LoongArchISD::ROTR_W", SDT_LoongArchIntBinOpW>; + def loongarch_rotl_w : SDNode<"LoongArchISD::ROTL_W", SDT_LoongArchIntBinOpW>; + def loongarch_crc_w_b_w +- : SDNode<"LoongArchISD::CRC_W_B_W", SDT_LoongArchIntBinOpW>; ++ : SDNode<"LoongArchISD::CRC_W_B_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>; + def loongarch_crc_w_h_w +- : SDNode<"LoongArchISD::CRC_W_H_W", SDT_LoongArchIntBinOpW>; ++ : SDNode<"LoongArchISD::CRC_W_H_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>; + def loongarch_crc_w_w_w +- : SDNode<"LoongArchISD::CRC_W_W_W", SDT_LoongArchIntBinOpW>; ++ : SDNode<"LoongArchISD::CRC_W_W_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>; + def loongarch_crc_w_d_w +- : SDNode<"LoongArchISD::CRC_W_D_W", SDT_LoongArchIntBinOpW>; ++ : SDNode<"LoongArchISD::CRC_W_D_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>; + def loongarch_crcc_w_b_w +- : SDNode<"LoongArchISD::CRCC_W_B_W", SDT_LoongArchIntBinOpW>; ++ : SDNode<"LoongArchISD::CRCC_W_B_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>; + def loongarch_crcc_w_h_w +- : SDNode<"LoongArchISD::CRCC_W_H_W", SDT_LoongArchIntBinOpW>; ++ : SDNode<"LoongArchISD::CRCC_W_H_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>; + def loongarch_crcc_w_w_w +- : SDNode<"LoongArchISD::CRCC_W_W_W", SDT_LoongArchIntBinOpW>; ++ : SDNode<"LoongArchISD::CRCC_W_W_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>; + def loongarch_crcc_w_d_w +- : SDNode<"LoongArchISD::CRCC_W_D_W", SDT_LoongArchIntBinOpW>; ++ : SDNode<"LoongArchISD::CRCC_W_D_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>; + def loongarch_bstrins + : SDNode<"LoongArchISD::BSTRINS", SDT_LoongArchBStrIns>; + def loongarch_bstrpick +@@ -106,7 +106,8 @@ def loongarch_ibar : SDNode<"LoongArchISD::IBAR", SDT_LoongArchVI, + [SDNPHasChain, SDNPSideEffect]>; + def loongarch_break : SDNode<"LoongArchISD::BREAK", SDT_LoongArchVI, + [SDNPHasChain, SDNPSideEffect]>; +-def loongarch_movfcsr2gr : SDNode<"LoongArchISD::MOVFCSR2GR", SDT_LoongArchMovfcsr2gr>; ++def loongarch_movfcsr2gr : SDNode<"LoongArchISD::MOVFCSR2GR", SDT_LoongArchMovfcsr2gr, ++ [SDNPHasChain]>; + def loongarch_movgr2fcsr : SDNode<"LoongArchISD::MOVGR2FCSR", SDT_LoongArchMovgr2fcsr, + [SDNPHasChain, SDNPSideEffect]>; + def loongarch_syscall : SDNode<"LoongArchISD::SYSCALL", SDT_LoongArchVI, +@@ -139,7 +140,7 @@ def loongarch_iocsrwr_d : SDNode<"LoongArchISD::IOCSRWR_D", + SDT_LoongArchIocsrwr, + [SDNPHasChain, SDNPSideEffect]>; + def loongarch_cpucfg : SDNode<"LoongArchISD::CPUCFG", SDTUnaryOp, +- [SDNPHasChain, SDNPSideEffect]>; ++ [SDNPHasChain]>; + + //===----------------------------------------------------------------------===// + // Operand and SDNode transformation definitions. +diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp b/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp +index d8850f656..a0136440e 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp +@@ -12,6 +12,7 @@ + + #include "LoongArchSubtarget.h" + #include "LoongArchFrameLowering.h" ++#include "MCTargetDesc/LoongArchBaseInfo.h" + + using namespace llvm; + +@@ -48,8 +49,8 @@ LoongArchSubtarget &LoongArchSubtarget::initializeSubtargetDependencies( + if (!Is64Bit && HasLA64) + report_fatal_error("Feature 64bit should be used for loongarch64 target."); + +- // TODO: ILP32{S,F} LP64{S,F} +- TargetABI = Is64Bit ? LoongArchABI::ABI_LP64D : LoongArchABI::ABI_ILP32D; ++ TargetABI = LoongArchABI::computeTargetABI(TT, ABIName); ++ + return *this; + } + +diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h +index aa87638e4..4ff42e3b1 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h ++++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h +@@ -42,6 +42,7 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo { + bool HasLaGlobalWithPcrel = false; + bool HasLaGlobalWithAbs = false; + bool HasLaLocalWithAbs = false; ++ bool HasUAL = false; + unsigned GRLen = 32; + MVT GRLenVT = MVT::i32; + LoongArchABI::ABI TargetABI = LoongArchABI::ABI_Unknown; +@@ -91,6 +92,7 @@ public: + bool hasLaGlobalWithPcrel() const { return HasLaGlobalWithPcrel; } + bool hasLaGlobalWithAbs() const { return HasLaGlobalWithAbs; } + bool hasLaLocalWithAbs() const { return HasLaLocalWithAbs; } ++ bool hasUAL() const { return HasUAL; } + MVT getGRLenVT() const { return GRLenVT; } + unsigned getGRLen() const { return GRLen; } + LoongArchABI::ABI getTargetABI() const { return TargetABI; } +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp +index ff0804e2a..ecb68ff40 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp +@@ -202,5 +202,5 @@ MCAsmBackend *llvm::createLoongArchAsmBackend(const Target &T, + const MCTargetOptions &Options) { + const Triple &TT = STI.getTargetTriple(); + uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS()); +- return new LoongArchAsmBackend(STI, OSABI, TT.isArch64Bit()); ++ return new LoongArchAsmBackend(STI, OSABI, TT.isArch64Bit(), Options); + } +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h +index 0d04cecc4..ae9bb8af0 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h +@@ -26,11 +26,13 @@ class LoongArchAsmBackend : public MCAsmBackend { + const MCSubtargetInfo &STI; + uint8_t OSABI; + bool Is64Bit; ++ const MCTargetOptions &TargetOptions; + + public: +- LoongArchAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit) +- : MCAsmBackend(support::little), STI(STI), OSABI(OSABI), +- Is64Bit(Is64Bit) {} ++ LoongArchAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit, ++ const MCTargetOptions &Options) ++ : MCAsmBackend(support::little), STI(STI), OSABI(OSABI), Is64Bit(Is64Bit), ++ TargetOptions(Options) {} + ~LoongArchAsmBackend() override {} + + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, +@@ -63,6 +65,7 @@ public: + + std::unique_ptr + createObjectTargetWriter() const override; ++ const MCTargetOptions &getTargetOptions() const { return TargetOptions; } + }; + } // end namespace llvm + +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp +index de2ba2833..28404f04d 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp +@@ -15,11 +15,71 @@ + #include "llvm/ADT/ArrayRef.h" + #include "llvm/ADT/Triple.h" + #include "llvm/MC/MCSubtargetInfo.h" ++#include "llvm/Support/raw_ostream.h" + + namespace llvm { + + namespace LoongArchABI { + ++ABI computeTargetABI(const Triple &TT, StringRef ABIName) { ++ ABI ArgProvidedABI = getTargetABI(ABIName); ++ bool Is64Bit = TT.isArch64Bit(); ++ ABI TripleABI; ++ ++ // Figure out the ABI explicitly requested via the triple's environment type. ++ switch (TT.getEnvironment()) { ++ case llvm::Triple::EnvironmentType::GNUSF: ++ TripleABI = Is64Bit ? LoongArchABI::ABI_LP64S : LoongArchABI::ABI_ILP32S; ++ break; ++ case llvm::Triple::EnvironmentType::GNUF32: ++ TripleABI = Is64Bit ? LoongArchABI::ABI_LP64F : LoongArchABI::ABI_ILP32F; ++ break; ++ ++ // Let the fallback case behave like {ILP32,LP64}D. ++ case llvm::Triple::EnvironmentType::GNUF64: ++ default: ++ TripleABI = Is64Bit ? LoongArchABI::ABI_LP64D : LoongArchABI::ABI_ILP32D; ++ break; ++ } ++ ++ switch (ArgProvidedABI) { ++ case LoongArchABI::ABI_Unknown: ++ // Fallback to the triple-implied ABI if ABI name is not specified or ++ // invalid. ++ if (!ABIName.empty()) ++ errs() << "'" << ABIName ++ << "' is not a recognized ABI for this target, ignoring and using " ++ "triple-implied ABI\n"; ++ return TripleABI; ++ ++ case LoongArchABI::ABI_ILP32S: ++ case LoongArchABI::ABI_ILP32F: ++ case LoongArchABI::ABI_ILP32D: ++ if (Is64Bit) { ++ errs() << "32-bit ABIs are not supported for 64-bit targets, ignoring " ++ "target-abi and using triple-implied ABI\n"; ++ return TripleABI; ++ } ++ break; ++ ++ case LoongArchABI::ABI_LP64S: ++ case LoongArchABI::ABI_LP64F: ++ case LoongArchABI::ABI_LP64D: ++ if (!Is64Bit) { ++ errs() << "64-bit ABIs are not supported for 32-bit targets, ignoring " ++ "target-abi and using triple-implied ABI\n"; ++ return TripleABI; ++ } ++ break; ++ } ++ ++ if (!ABIName.empty() && TT.hasEnvironment() && ArgProvidedABI != TripleABI) ++ errs() << "warning: triple-implied ABI conflicts with provided target-abi '" ++ << ABIName << "', using target-abi\n"; ++ ++ return ArgProvidedABI; ++} ++ + ABI getTargetABI(StringRef ABIName) { + auto TargetABI = StringSwitch(ABIName) + .Case("ilp32s", ABI_ILP32S) +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h +index c5f072677..cdbd1f569 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h +@@ -54,6 +54,7 @@ enum ABI { + ABI_Unknown + }; + ++ABI computeTargetABI(const Triple &TT, StringRef ABIName); + ABI getTargetABI(StringRef ABIName); + + // Returns the register used to hold the stack pointer after realignment. +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp +index 57330dd31..a6b9c0652 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp +@@ -59,7 +59,7 @@ unsigned LoongArchELFObjectWriter::getRelocType(MCContext &Ctx, + case FK_Data_4: + return IsPCRel ? ELF::R_LARCH_32_PCREL : ELF::R_LARCH_32; + case FK_Data_8: +- return ELF::R_LARCH_64; ++ return IsPCRel ? ELF::R_LARCH_64_PCREL : ELF::R_LARCH_64; + case LoongArch::fixup_loongarch_b16: + return ELF::R_LARCH_B16; + case LoongArch::fixup_loongarch_b21: +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp +index 3410c8f42..a6e15e094 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp +@@ -12,6 +12,7 @@ + + #include "LoongArchELFStreamer.h" + #include "LoongArchAsmBackend.h" ++#include "LoongArchBaseInfo.h" + #include "llvm/BinaryFormat/ELF.h" + #include "llvm/MC/MCAssembler.h" + #include "llvm/MC/MCCodeEmitter.h" +@@ -23,9 +24,10 @@ using namespace llvm; + LoongArchTargetELFStreamer::LoongArchTargetELFStreamer( + MCStreamer &S, const MCSubtargetInfo &STI) + : LoongArchTargetStreamer(S) { +- // FIXME: select appropriate ABI. +- setTargetABI(STI.getTargetTriple().isArch64Bit() ? LoongArchABI::ABI_LP64D +- : LoongArchABI::ABI_ILP32D); ++ auto &MAB = static_cast( ++ getStreamer().getAssembler().getBackend()); ++ setTargetABI(LoongArchABI::computeTargetABI( ++ STI.getTargetTriple(), MAB.getTargetOptions().getABIName())); + } + + MCELFStreamer &LoongArchTargetELFStreamer::getStreamer() { +diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp +index 80ebe0fa5..f6ea8d290 100644 +--- a/llvm/lib/TargetParser/Host.cpp ++++ b/llvm/lib/TargetParser/Host.cpp +@@ -1448,6 +1448,20 @@ StringRef sys::getHostCPUName() { + return "generic"; + } + } ++#elif defined(__loongarch__) ++StringRef sys::getHostCPUName() { ++ // Use processor id to detect cpu name. ++ uint32_t processor_id; ++ __asm__("cpucfg %[prid], $zero\n\t" : [prid] "=r"(processor_id)); ++ switch (processor_id & 0xff00) { ++ case 0xc000: // Loongson 64bit, 4-issue ++ return "la464"; ++ // TODO: Others. ++ default: ++ break; ++ } ++ return "generic"; ++} + #elif defined(__riscv) + StringRef sys::getHostCPUName() { + #if defined(__linux__) +@@ -1842,6 +1856,23 @@ bool sys::getHostCPUFeatures(StringMap &Features) { + + return true; + } ++#elif defined(__linux__) && defined(__loongarch__) ++#include ++bool sys::getHostCPUFeatures(StringMap &Features) { ++ unsigned long hwcap = getauxval(AT_HWCAP); ++ bool HasFPU = hwcap & (1UL << 3); // HWCAP_LOONGARCH_FPU ++ uint32_t cpucfg2 = 0x2; ++ __asm__("cpucfg %[cpucfg2], %[cpucfg2]\n\t" : [cpucfg2] "+r"(cpucfg2)); ++ ++ Features["f"] = HasFPU && (cpucfg2 & (1U << 1)); // CPUCFG.2.FP_SP ++ Features["d"] = HasFPU && (cpucfg2 & (1U << 2)); // CPUCFG.2.FP_DP ++ ++ Features["lsx"] = hwcap & (1UL << 4); // HWCAP_LOONGARCH_LSX ++ Features["lasx"] = hwcap & (1UL << 5); // HWCAP_LOONGARCH_LASX ++ Features["lvz"] = hwcap & (1UL << 9); // HWCAP_LOONGARCH_LVZ ++ ++ return true; ++} + #else + bool sys::getHostCPUFeatures(StringMap &Features) { return false; } + #endif +diff --git a/llvm/lib/TargetParser/LoongArchTargetParser.cpp b/llvm/lib/TargetParser/LoongArchTargetParser.cpp +index faa8c314f..772d24c5c 100644 +--- a/llvm/lib/TargetParser/LoongArchTargetParser.cpp ++++ b/llvm/lib/TargetParser/LoongArchTargetParser.cpp +@@ -1,4 +1,4 @@ +-//==-- LoongArch64TargetParser - Parser for LoongArch64 features --*- C++ -*-=// ++//===-- LoongArchTargetParser - Parser for LoongArch features --*- C++ -*-====// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. +@@ -27,12 +27,11 @@ const ArchInfo AllArchs[] = { + #include "llvm/TargetParser/LoongArchTargetParser.def" + }; + +-LoongArch::ArchKind LoongArch::parseArch(StringRef Arch) { ++bool LoongArch::isValidArchName(StringRef Arch) { + for (const auto A : AllArchs) + if (A.Name == Arch) +- return A.Kind; +- +- return LoongArch::ArchKind::AK_INVALID; ++ return true; ++ return false; + } + + bool LoongArch::getArchFeatures(StringRef Arch, +@@ -40,10 +39,22 @@ bool LoongArch::getArchFeatures(StringRef Arch, + for (const auto A : AllArchs) { + if (A.Name == Arch) { + for (const auto F : AllFeatures) +- if ((A.Features & F.Kind) == F.Kind && F.Kind != FK_INVALID) ++ if ((A.Features & F.Kind) == F.Kind) + Features.push_back(F.Name); + return true; + } + } + return false; + } ++ ++bool LoongArch::isValidCPUName(StringRef Name) { return isValidArchName(Name); } ++ ++void LoongArch::fillValidCPUList(SmallVectorImpl &Values) { ++ for (const auto A : AllArchs) ++ Values.emplace_back(A.Name); ++} ++ ++StringRef LoongArch::getDefaultArch(bool Is64Bit) { ++ // TODO: use a real 32-bit arch name. ++ return Is64Bit ? "loongarch64" : ""; ++} +diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +index 599eeeabc..367a2bef2 100644 +--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp ++++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +@@ -492,7 +492,7 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize, + bool IsMIPS64 = TargetTriple.isMIPS64(); + bool IsArmOrThumb = TargetTriple.isARM() || TargetTriple.isThumb(); + bool IsAArch64 = TargetTriple.getArch() == Triple::aarch64; +- bool IsLoongArch64 = TargetTriple.getArch() == Triple::loongarch64; ++ bool IsLoongArch64 = TargetTriple.isLoongArch64(); + bool IsRISCV64 = TargetTriple.getArch() == Triple::riscv64; + bool IsWindows = TargetTriple.isOSWindows(); + bool IsFuchsia = TargetTriple.isOSFuchsia(); +diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +index fe8b8ce0d..603fa97e1 100644 +--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp ++++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +@@ -434,6 +434,14 @@ static const MemoryMapParams Linux_AArch64_MemoryMapParams = { + 0x0200000000000, // OriginBase + }; + ++// loongarch64 Linux ++static const MemoryMapParams Linux_LoongArch64_MemoryMapParams = { ++ 0, // AndMask (not used) ++ 0x500000000000, // XorMask ++ 0, // ShadowBase (not used) ++ 0x100000000000, // OriginBase ++}; ++ + // aarch64 FreeBSD + static const MemoryMapParams FreeBSD_AArch64_MemoryMapParams = { + 0x1800000000000, // AndMask +@@ -491,6 +499,11 @@ static const PlatformMemoryMapParams Linux_ARM_MemoryMapParams = { + &Linux_AArch64_MemoryMapParams, + }; + ++static const PlatformMemoryMapParams Linux_LoongArch_MemoryMapParams = { ++ nullptr, ++ &Linux_LoongArch64_MemoryMapParams, ++}; ++ + static const PlatformMemoryMapParams FreeBSD_ARM_MemoryMapParams = { + nullptr, + &FreeBSD_AArch64_MemoryMapParams, +@@ -537,6 +550,7 @@ private: + friend struct VarArgAArch64Helper; + friend struct VarArgPowerPC64Helper; + friend struct VarArgSystemZHelper; ++ friend struct VarArgLoongArch64Helper; + + void initializeModule(Module &M); + void initializeCallbacks(Module &M, const TargetLibraryInfo &TLI); +@@ -986,6 +1000,9 @@ void MemorySanitizer::initializeModule(Module &M) { + case Triple::aarch64_be: + MapParams = Linux_ARM_MemoryMapParams.bits64; + break; ++ case Triple::loongarch64: ++ MapParams = Linux_LoongArch_MemoryMapParams.bits64; ++ break; + default: + report_fatal_error("unsupported architecture"); + } +@@ -5709,6 +5726,123 @@ struct VarArgSystemZHelper : public VarArgHelper { + } + }; + ++/// LoongArch64-specific implementation of VarArgHelper. ++struct VarArgLoongArch64Helper : public VarArgHelper { ++ Function &F; ++ MemorySanitizer &MS; ++ MemorySanitizerVisitor &MSV; ++ AllocaInst *VAArgTLSCopy = nullptr; ++ Value *VAArgSize = nullptr; ++ ++ SmallVector VAStartInstrumentationList; ++ ++ VarArgLoongArch64Helper(Function &F, MemorySanitizer &MS, ++ MemorySanitizerVisitor &MSV) ++ : F(F), MS(MS), MSV(MSV) {} ++ ++ void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override { ++ unsigned VAArgOffset = 0; ++ const DataLayout &DL = F.getParent()->getDataLayout(); ++ for (Value *A : ++ llvm::drop_begin(CB.args(), CB.getFunctionType()->getNumParams())) { ++ Value *Base; ++ uint64_t ArgSize = DL.getTypeAllocSize(A->getType()); ++ Base = getShadowPtrForVAArgument(A->getType(), IRB, VAArgOffset, ArgSize); ++ VAArgOffset += ArgSize; ++ VAArgOffset = alignTo(VAArgOffset, 8); ++ if (!Base) ++ continue; ++ IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment); ++ } ++ ++ Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(), VAArgOffset); ++ // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of ++ // a new class member i.e. it is the total size of all VarArgs. ++ IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS); ++ } ++ ++ /// Compute the shadow address for a given va_arg. ++ Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB, ++ unsigned ArgOffset, unsigned ArgSize) { ++ // Make sure we don't overflow __msan_va_arg_tls. ++ if (ArgOffset + ArgSize > kParamTLSSize) ++ return nullptr; ++ Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy); ++ Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset)); ++ return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0), ++ "_msarg"); ++ } ++ ++ void visitVAStartInst(VAStartInst &I) override { ++ IRBuilder<> IRB(&I); ++ VAStartInstrumentationList.push_back(&I); ++ Value *VAListTag = I.getArgOperand(0); ++ Value *ShadowPtr, *OriginPtr; ++ const Align Alignment = Align(8); ++ std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr( ++ VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true); ++ IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), ++ /* size */ 8, Alignment, false); ++ } ++ ++ void visitVACopyInst(VACopyInst &I) override { ++ IRBuilder<> IRB(&I); ++ VAStartInstrumentationList.push_back(&I); ++ Value *VAListTag = I.getArgOperand(0); ++ Value *ShadowPtr, *OriginPtr; ++ const Align Alignment = Align(8); ++ std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr( ++ VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true); ++ IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), ++ /* size */ 8, Alignment, false); ++ } ++ ++ void finalizeInstrumentation() override { ++ assert(!VAArgSize && !VAArgTLSCopy && ++ "finalizeInstrumentation called twice"); ++ IRBuilder<> IRB(MSV.FnPrologueEnd); ++ VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS); ++ Value *CopySize = ++ IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0), VAArgSize); ++ ++ if (!VAStartInstrumentationList.empty()) { ++ // If there is a va_start in this function, make a backup copy of ++ // va_arg_tls somewhere in the function entry block. ++ VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize); ++ VAArgTLSCopy->setAlignment(kShadowTLSAlignment); ++ IRB.CreateMemSet(VAArgTLSCopy, Constant::getNullValue(IRB.getInt8Ty()), ++ CopySize, kShadowTLSAlignment, false); ++ ++ Value *SrcSize = IRB.CreateBinaryIntrinsic( ++ Intrinsic::umin, CopySize, ++ ConstantInt::get(MS.IntptrTy, kParamTLSSize)); ++ IRB.CreateMemCpy(VAArgTLSCopy, kShadowTLSAlignment, MS.VAArgTLS, ++ kShadowTLSAlignment, SrcSize); ++ } ++ ++ // Instrument va_start. ++ // Copy va_list shadow from the backup copy of the TLS contents. ++ for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) { ++ CallInst *OrigInst = VAStartInstrumentationList[i]; ++ NextNodeIRBuilder IRB(OrigInst); ++ Value *VAListTag = OrigInst->getArgOperand(0); ++ Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C); ++ Value *RegSaveAreaPtrPtr = ++ IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), ++ PointerType::get(Type::getInt64PtrTy(*MS.C), 0)); ++ Value *RegSaveAreaPtr = ++ IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr); ++ Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr; ++ const Align Alignment = Align(8); ++ std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) = ++ MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(), ++ Alignment, /*isStore*/ true); ++ IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment, ++ CopySize); ++ } ++ } ++}; ++ + /// A no-op implementation of VarArgHelper. + struct VarArgNoOpHelper : public VarArgHelper { + VarArgNoOpHelper(Function &F, MemorySanitizer &MS, +@@ -5741,6 +5875,8 @@ static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan, + return new VarArgPowerPC64Helper(Func, Msan, Visitor); + else if (TargetTriple.getArch() == Triple::systemz) + return new VarArgSystemZHelper(Func, Msan, Visitor); ++ else if (TargetTriple.getArch() == Triple::loongarch64) ++ return new VarArgLoongArch64Helper(Func, Msan, Visitor); + else + return new VarArgNoOpHelper(Func, Msan, Visitor); + } +diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-common.ll b/llvm/test/CodeGen/LoongArch/calling-conv-common.ll +new file mode 100644 +index 000000000..08fff9f8c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/calling-conv-common.ll +@@ -0,0 +1,403 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --target-abi=lp64s < %s | FileCheck %s ++; RUN: llc --mtriple=loongarch64 --mattr=+d --target-abi=lp64d < %s | FileCheck %s ++ ++;; This file contains tests that should have identical output for all ABIs, i.e. ++;; where no arguments are passed via floating point registers. ++ ++;; Check that on LA64, i128 is passed in a pair of GPRs. ++define i64 @callee_i128_in_regs(i64 %a, i128 %b) nounwind { ++; CHECK-LABEL: callee_i128_in_regs: ++; CHECK: # %bb.0: ++; CHECK-NEXT: add.d $a0, $a0, $a1 ++; CHECK-NEXT: ret ++ %b_trunc = trunc i128 %b to i64 ++ %1 = add i64 %a, %b_trunc ++ ret i64 %1 ++} ++ ++define i64 @caller_i128_in_regs() nounwind { ++; CHECK-LABEL: caller_i128_in_regs: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -16 ++; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill ++; CHECK-NEXT: ori $a0, $zero, 1 ++; CHECK-NEXT: ori $a1, $zero, 2 ++; CHECK-NEXT: move $a2, $zero ++; CHECK-NEXT: bl %plt(callee_i128_in_regs) ++; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 16 ++; CHECK-NEXT: ret ++ %1 = call i64 @callee_i128_in_regs(i64 1, i128 2) ++ ret i64 %1 ++} ++ ++;; Check that the stack is used once the GPRs are exhausted. ++define i64 @callee_many_scalars(i8 %a, i16 %b, i32 %c, i64 %d, i128 %e, i64 %f, i128 %g, i64 %h) nounwind { ++; CHECK-LABEL: callee_many_scalars: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.d $t0, $sp, 0 ++; CHECK-NEXT: xor $a5, $a5, $t0 ++; CHECK-NEXT: xor $a4, $a4, $a7 ++; CHECK-NEXT: or $a4, $a4, $a5 ++; CHECK-NEXT: bstrpick.d $a1, $a1, 15, 0 ++; CHECK-NEXT: andi $a0, $a0, 255 ++; CHECK-NEXT: add.d $a0, $a0, $a1 ++; CHECK-NEXT: bstrpick.d $a1, $a2, 31, 0 ++; CHECK-NEXT: add.d $a0, $a0, $a1 ++; CHECK-NEXT: add.d $a0, $a0, $a3 ++; CHECK-NEXT: sltui $a1, $a4, 1 ++; CHECK-NEXT: add.d $a0, $a1, $a0 ++; CHECK-NEXT: add.d $a0, $a0, $a6 ++; CHECK-NEXT: ld.d $a1, $sp, 8 ++; CHECK-NEXT: add.d $a0, $a0, $a1 ++; CHECK-NEXT: ret ++ %a_ext = zext i8 %a to i64 ++ %b_ext = zext i16 %b to i64 ++ %c_ext = zext i32 %c to i64 ++ %1 = add i64 %a_ext, %b_ext ++ %2 = add i64 %1, %c_ext ++ %3 = add i64 %2, %d ++ %4 = icmp eq i128 %e, %g ++ %5 = zext i1 %4 to i64 ++ %6 = add i64 %5, %3 ++ %7 = add i64 %6, %f ++ %8 = add i64 %7, %h ++ ret i64 %8 ++} ++ ++define i64 @caller_many_scalars() nounwind { ++; CHECK-LABEL: caller_many_scalars: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -32 ++; CHECK-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: ori $a0, $zero, 8 ++; CHECK-NEXT: st.d $a0, $sp, 8 ++; CHECK-NEXT: st.d $zero, $sp, 0 ++; CHECK-NEXT: ori $a0, $zero, 1 ++; CHECK-NEXT: ori $a1, $zero, 2 ++; CHECK-NEXT: ori $a2, $zero, 3 ++; CHECK-NEXT: ori $a3, $zero, 4 ++; CHECK-NEXT: ori $a4, $zero, 5 ++; CHECK-NEXT: ori $a6, $zero, 6 ++; CHECK-NEXT: ori $a7, $zero, 7 ++; CHECK-NEXT: move $a5, $zero ++; CHECK-NEXT: bl %plt(callee_many_scalars) ++; CHECK-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 32 ++; CHECK-NEXT: ret ++ %1 = call i64 @callee_many_scalars(i8 1, i16 2, i32 3, i64 4, i128 5, i64 6, i128 7, i64 8) ++ ret i64 %1 ++} ++ ++;; Check that i256 is passed indirectly. ++ ++define i64 @callee_large_scalars(i256 %a, i256 %b) nounwind { ++; CHECK-LABEL: callee_large_scalars: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.d $a2, $a1, 24 ++; CHECK-NEXT: ld.d $a3, $a0, 24 ++; CHECK-NEXT: xor $a2, $a3, $a2 ++; CHECK-NEXT: ld.d $a3, $a1, 8 ++; CHECK-NEXT: ld.d $a4, $a0, 8 ++; CHECK-NEXT: xor $a3, $a4, $a3 ++; CHECK-NEXT: or $a2, $a3, $a2 ++; CHECK-NEXT: ld.d $a3, $a1, 16 ++; CHECK-NEXT: ld.d $a4, $a0, 16 ++; CHECK-NEXT: xor $a3, $a4, $a3 ++; CHECK-NEXT: ld.d $a1, $a1, 0 ++; CHECK-NEXT: ld.d $a0, $a0, 0 ++; CHECK-NEXT: xor $a0, $a0, $a1 ++; CHECK-NEXT: or $a0, $a0, $a3 ++; CHECK-NEXT: or $a0, $a0, $a2 ++; CHECK-NEXT: sltui $a0, $a0, 1 ++; CHECK-NEXT: ret ++ %1 = icmp eq i256 %a, %b ++ %2 = zext i1 %1 to i64 ++ ret i64 %2 ++} ++ ++define i64 @caller_large_scalars() nounwind { ++; CHECK-LABEL: caller_large_scalars: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -80 ++; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; CHECK-NEXT: ori $a0, $zero, 2 ++; CHECK-NEXT: st.d $a0, $sp, 0 ++; CHECK-NEXT: st.d $zero, $sp, 24 ++; CHECK-NEXT: st.d $zero, $sp, 16 ++; CHECK-NEXT: st.d $zero, $sp, 8 ++; CHECK-NEXT: st.d $zero, $sp, 56 ++; CHECK-NEXT: st.d $zero, $sp, 48 ++; CHECK-NEXT: st.d $zero, $sp, 40 ++; CHECK-NEXT: ori $a0, $zero, 1 ++; CHECK-NEXT: st.d $a0, $sp, 32 ++; CHECK-NEXT: addi.d $a0, $sp, 32 ++; CHECK-NEXT: addi.d $a1, $sp, 0 ++; CHECK-NEXT: bl %plt(callee_large_scalars) ++; CHECK-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 80 ++; CHECK-NEXT: ret ++ %1 = call i64 @callee_large_scalars(i256 1, i256 2) ++ ret i64 %1 ++} ++ ++;; Check that arguments larger than 2*GRLen are handled correctly when their ++;; address is passed on the stack rather than in memory. ++ ++;; Must keep define on a single line due to an update_llc_test_checks.py limitation ++define i64 @callee_large_scalars_exhausted_regs(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i256 %h, i64 %i, i256 %j) nounwind { ++; CHECK-LABEL: callee_large_scalars_exhausted_regs: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.d $a0, $sp, 8 ++; CHECK-NEXT: ld.d $a1, $a0, 24 ++; CHECK-NEXT: ld.d $a2, $a7, 24 ++; CHECK-NEXT: xor $a1, $a2, $a1 ++; CHECK-NEXT: ld.d $a2, $a0, 8 ++; CHECK-NEXT: ld.d $a3, $a7, 8 ++; CHECK-NEXT: xor $a2, $a3, $a2 ++; CHECK-NEXT: or $a1, $a2, $a1 ++; CHECK-NEXT: ld.d $a2, $a0, 16 ++; CHECK-NEXT: ld.d $a3, $a7, 16 ++; CHECK-NEXT: xor $a2, $a3, $a2 ++; CHECK-NEXT: ld.d $a0, $a0, 0 ++; CHECK-NEXT: ld.d $a3, $a7, 0 ++; CHECK-NEXT: xor $a0, $a3, $a0 ++; CHECK-NEXT: or $a0, $a0, $a2 ++; CHECK-NEXT: or $a0, $a0, $a1 ++; CHECK-NEXT: sltui $a0, $a0, 1 ++; CHECK-NEXT: ret ++ %1 = icmp eq i256 %h, %j ++ %2 = zext i1 %1 to i64 ++ ret i64 %2 ++} ++ ++define i64 @caller_large_scalars_exhausted_regs() nounwind { ++; CHECK-LABEL: caller_large_scalars_exhausted_regs: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -96 ++; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill ++; CHECK-NEXT: addi.d $a0, $sp, 16 ++; CHECK-NEXT: st.d $a0, $sp, 8 ++; CHECK-NEXT: ori $a0, $zero, 9 ++; CHECK-NEXT: st.d $a0, $sp, 0 ++; CHECK-NEXT: ori $a0, $zero, 10 ++; CHECK-NEXT: st.d $a0, $sp, 16 ++; CHECK-NEXT: st.d $zero, $sp, 40 ++; CHECK-NEXT: st.d $zero, $sp, 32 ++; CHECK-NEXT: st.d $zero, $sp, 24 ++; CHECK-NEXT: st.d $zero, $sp, 72 ++; CHECK-NEXT: st.d $zero, $sp, 64 ++; CHECK-NEXT: st.d $zero, $sp, 56 ++; CHECK-NEXT: ori $a0, $zero, 8 ++; CHECK-NEXT: st.d $a0, $sp, 48 ++; CHECK-NEXT: ori $a0, $zero, 1 ++; CHECK-NEXT: ori $a1, $zero, 2 ++; CHECK-NEXT: ori $a2, $zero, 3 ++; CHECK-NEXT: ori $a3, $zero, 4 ++; CHECK-NEXT: ori $a4, $zero, 5 ++; CHECK-NEXT: ori $a5, $zero, 6 ++; CHECK-NEXT: ori $a6, $zero, 7 ++; CHECK-NEXT: addi.d $a7, $sp, 48 ++; CHECK-NEXT: bl %plt(callee_large_scalars_exhausted_regs) ++; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 96 ++; CHECK-NEXT: ret ++ %1 = call i64 @callee_large_scalars_exhausted_regs( ++ i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i256 8, i64 9, ++ i256 10) ++ ret i64 %1 ++} ++ ++;; Check large struct arguments, which are passed byval ++ ++%struct.large = type { i64, i64, i64, i64 } ++ ++define i64 @callee_large_struct(ptr byval(%struct.large) align 8 %a) nounwind { ++; CHECK-LABEL: callee_large_struct: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.d $a1, $a0, 24 ++; CHECK-NEXT: ld.d $a0, $a0, 0 ++; CHECK-NEXT: add.d $a0, $a0, $a1 ++; CHECK-NEXT: ret ++ %1 = getelementptr inbounds %struct.large, ptr %a, i64 0, i32 0 ++ %2 = getelementptr inbounds %struct.large, ptr %a, i64 0, i32 3 ++ %3 = load i64, ptr %1 ++ %4 = load i64, ptr %2 ++ %5 = add i64 %3, %4 ++ ret i64 %5 ++} ++ ++define i64 @caller_large_struct() nounwind { ++; CHECK-LABEL: caller_large_struct: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -80 ++; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; CHECK-NEXT: ori $a0, $zero, 1 ++; CHECK-NEXT: st.d $a0, $sp, 40 ++; CHECK-NEXT: st.d $a0, $sp, 8 ++; CHECK-NEXT: ori $a0, $zero, 2 ++; CHECK-NEXT: st.d $a0, $sp, 48 ++; CHECK-NEXT: st.d $a0, $sp, 16 ++; CHECK-NEXT: ori $a0, $zero, 3 ++; CHECK-NEXT: st.d $a0, $sp, 56 ++; CHECK-NEXT: st.d $a0, $sp, 24 ++; CHECK-NEXT: ori $a0, $zero, 4 ++; CHECK-NEXT: st.d $a0, $sp, 64 ++; CHECK-NEXT: st.d $a0, $sp, 32 ++; CHECK-NEXT: addi.d $a0, $sp, 8 ++; CHECK-NEXT: bl %plt(callee_large_struct) ++; CHECK-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 80 ++; CHECK-NEXT: ret ++ %ls = alloca %struct.large, align 8 ++ %a = getelementptr inbounds %struct.large, ptr %ls, i64 0, i32 0 ++ store i64 1, ptr %a ++ %b = getelementptr inbounds %struct.large, ptr %ls, i64 0, i32 1 ++ store i64 2, ptr %b ++ %c = getelementptr inbounds %struct.large, ptr %ls, i64 0, i32 2 ++ store i64 3, ptr %c ++ %d = getelementptr inbounds %struct.large, ptr %ls, i64 0, i32 3 ++ store i64 4, ptr %d ++ %1 = call i64 @callee_large_struct(ptr byval(%struct.large) align 8 %ls) ++ ret i64 %1 ++} ++ ++;; Check return scalar which size is 2*GRLen. ++ ++define i128 @callee_small_scalar_ret() nounwind { ++; CHECK-LABEL: callee_small_scalar_ret: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $a0, $zero, -1 ++; CHECK-NEXT: move $a1, $a0 ++; CHECK-NEXT: ret ++ ret i128 -1 ++} ++ ++define i64 @caller_small_scalar_ret() nounwind { ++; CHECK-LABEL: caller_small_scalar_ret: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -16 ++; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill ++; CHECK-NEXT: bl %plt(callee_small_scalar_ret) ++; CHECK-NEXT: addi.w $a2, $zero, -2 ++; CHECK-NEXT: xor $a0, $a0, $a2 ++; CHECK-NEXT: orn $a0, $a0, $a1 ++; CHECK-NEXT: sltui $a0, $a0, 1 ++; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 16 ++; CHECK-NEXT: ret ++ %1 = call i128 @callee_small_scalar_ret() ++ %2 = icmp eq i128 -2, %1 ++ %3 = zext i1 %2 to i64 ++ ret i64 %3 ++} ++ ++;; Check return struct which size is 2*GRLen. ++ ++%struct.small = type { i64, ptr } ++ ++define %struct.small @callee_small_struct_ret() nounwind { ++; CHECK-LABEL: callee_small_struct_ret: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $a0, $zero, 1 ++; CHECK-NEXT: move $a1, $zero ++; CHECK-NEXT: ret ++ ret %struct.small { i64 1, ptr null } ++} ++ ++define i64 @caller_small_struct_ret() nounwind { ++; CHECK-LABEL: caller_small_struct_ret: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -16 ++; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill ++; CHECK-NEXT: bl %plt(callee_small_struct_ret) ++; CHECK-NEXT: add.d $a0, $a0, $a1 ++; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 16 ++; CHECK-NEXT: ret ++ %1 = call %struct.small @callee_small_struct_ret() ++ %2 = extractvalue %struct.small %1, 0 ++ %3 = extractvalue %struct.small %1, 1 ++ %4 = ptrtoint ptr %3 to i64 ++ %5 = add i64 %2, %4 ++ ret i64 %5 ++} ++ ++;; Check return scalar which size is more than 2*GRLen. ++ ++define i256 @callee_large_scalar_ret() nounwind { ++; CHECK-LABEL: callee_large_scalar_ret: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $a1, $zero, -1 ++; CHECK-NEXT: st.d $a1, $a0, 24 ++; CHECK-NEXT: st.d $a1, $a0, 16 ++; CHECK-NEXT: st.d $a1, $a0, 8 ++; CHECK-NEXT: lu12i.w $a1, -30141 ++; CHECK-NEXT: ori $a1, $a1, 747 ++; CHECK-NEXT: st.d $a1, $a0, 0 ++; CHECK-NEXT: ret ++ ret i256 -123456789 ++} ++ ++define void @caller_large_scalar_ret() nounwind { ++; CHECK-LABEL: caller_large_scalar_ret: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -48 ++; CHECK-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: addi.d $a0, $sp, 0 ++; CHECK-NEXT: bl %plt(callee_large_scalar_ret) ++; CHECK-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 48 ++; CHECK-NEXT: ret ++ %1 = call i256 @callee_large_scalar_ret() ++ ret void ++} ++ ++;; Check return struct which size is more than 2*GRLen. ++ ++define void @callee_large_struct_ret(ptr noalias sret(%struct.large) %agg.result) nounwind { ++; CHECK-LABEL: callee_large_struct_ret: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $a1, $zero, 4 ++; CHECK-NEXT: st.d $a1, $a0, 24 ++; CHECK-NEXT: ori $a1, $zero, 3 ++; CHECK-NEXT: st.d $a1, $a0, 16 ++; CHECK-NEXT: ori $a1, $zero, 2 ++; CHECK-NEXT: st.d $a1, $a0, 8 ++; CHECK-NEXT: ori $a1, $zero, 1 ++; CHECK-NEXT: st.d $a1, $a0, 0 ++; CHECK-NEXT: ret ++ %a = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 0 ++ store i64 1, ptr %a, align 4 ++ %b = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 1 ++ store i64 2, ptr %b, align 4 ++ %c = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 2 ++ store i64 3, ptr %c, align 4 ++ %d = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 3 ++ store i64 4, ptr %d, align 4 ++ ret void ++} ++ ++define i64 @caller_large_struct_ret() nounwind { ++; CHECK-LABEL: caller_large_struct_ret: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -48 ++; CHECK-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: addi.d $a0, $sp, 8 ++; CHECK-NEXT: bl %plt(callee_large_struct_ret) ++; CHECK-NEXT: ld.d $a0, $sp, 32 ++; CHECK-NEXT: ld.d $a1, $sp, 8 ++; CHECK-NEXT: add.d $a0, $a1, $a0 ++; CHECK-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 48 ++; CHECK-NEXT: ret ++ %1 = alloca %struct.large ++ call void @callee_large_struct_ret(ptr sret(%struct.large) %1) ++ %2 = getelementptr inbounds %struct.large, ptr %1, i64 0, i32 0 ++ %3 = load i64, ptr %2 ++ %4 = getelementptr inbounds %struct.large, ptr %1, i64 0, i32 3 ++ %5 = load i64, ptr %4 ++ %6 = add i64 %3, %5 ++ ret i64 %6 ++} +diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll b/llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll +index ae2ce7291..ceb38876c 100644 +--- a/llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll ++++ b/llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll +@@ -2,406 +2,7 @@ + ; RUN: llc --mtriple=loongarch64 --mattr=+d --target-abi=lp64d < %s \ + ; RUN: | FileCheck %s + +-;; Check that on LA64, i128 is passed in a pair of GPRs. +-define i64 @callee_i128_in_regs(i64 %a, i128 %b) nounwind { +-; CHECK-LABEL: callee_i128_in_regs: +-; CHECK: # %bb.0: +-; CHECK-NEXT: add.d $a0, $a0, $a1 +-; CHECK-NEXT: ret +- %b_trunc = trunc i128 %b to i64 +- %1 = add i64 %a, %b_trunc +- ret i64 %1 +-} +- +-define i64 @caller_i128_in_regs() nounwind { +-; CHECK-LABEL: caller_i128_in_regs: +-; CHECK: # %bb.0: +-; CHECK-NEXT: addi.d $sp, $sp, -16 +-; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +-; CHECK-NEXT: ori $a0, $zero, 1 +-; CHECK-NEXT: ori $a1, $zero, 2 +-; CHECK-NEXT: move $a2, $zero +-; CHECK-NEXT: bl %plt(callee_i128_in_regs) +-; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +-; CHECK-NEXT: addi.d $sp, $sp, 16 +-; CHECK-NEXT: ret +- %1 = call i64 @callee_i128_in_regs(i64 1, i128 2) +- ret i64 %1 +-} +- +-;; Check that the stack is used once the GPRs are exhausted. +-define i64 @callee_many_scalars(i8 %a, i16 %b, i32 %c, i64 %d, i128 %e, i64 %f, i128 %g, i64 %h) nounwind { +-; CHECK-LABEL: callee_many_scalars: +-; CHECK: # %bb.0: +-; CHECK-NEXT: ld.d $t0, $sp, 0 +-; CHECK-NEXT: xor $a5, $a5, $t0 +-; CHECK-NEXT: xor $a4, $a4, $a7 +-; CHECK-NEXT: or $a4, $a4, $a5 +-; CHECK-NEXT: bstrpick.d $a1, $a1, 15, 0 +-; CHECK-NEXT: andi $a0, $a0, 255 +-; CHECK-NEXT: add.d $a0, $a0, $a1 +-; CHECK-NEXT: bstrpick.d $a1, $a2, 31, 0 +-; CHECK-NEXT: add.d $a0, $a0, $a1 +-; CHECK-NEXT: add.d $a0, $a0, $a3 +-; CHECK-NEXT: sltui $a1, $a4, 1 +-; CHECK-NEXT: add.d $a0, $a1, $a0 +-; CHECK-NEXT: add.d $a0, $a0, $a6 +-; CHECK-NEXT: ld.d $a1, $sp, 8 +-; CHECK-NEXT: add.d $a0, $a0, $a1 +-; CHECK-NEXT: ret +- %a_ext = zext i8 %a to i64 +- %b_ext = zext i16 %b to i64 +- %c_ext = zext i32 %c to i64 +- %1 = add i64 %a_ext, %b_ext +- %2 = add i64 %1, %c_ext +- %3 = add i64 %2, %d +- %4 = icmp eq i128 %e, %g +- %5 = zext i1 %4 to i64 +- %6 = add i64 %5, %3 +- %7 = add i64 %6, %f +- %8 = add i64 %7, %h +- ret i64 %8 +-} +- +-define i64 @caller_many_scalars() nounwind { +-; CHECK-LABEL: caller_many_scalars: +-; CHECK: # %bb.0: +-; CHECK-NEXT: addi.d $sp, $sp, -32 +-; CHECK-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +-; CHECK-NEXT: ori $a0, $zero, 8 +-; CHECK-NEXT: st.d $a0, $sp, 8 +-; CHECK-NEXT: st.d $zero, $sp, 0 +-; CHECK-NEXT: ori $a0, $zero, 1 +-; CHECK-NEXT: ori $a1, $zero, 2 +-; CHECK-NEXT: ori $a2, $zero, 3 +-; CHECK-NEXT: ori $a3, $zero, 4 +-; CHECK-NEXT: ori $a4, $zero, 5 +-; CHECK-NEXT: ori $a6, $zero, 6 +-; CHECK-NEXT: ori $a7, $zero, 7 +-; CHECK-NEXT: move $a5, $zero +-; CHECK-NEXT: bl %plt(callee_many_scalars) +-; CHECK-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +-; CHECK-NEXT: addi.d $sp, $sp, 32 +-; CHECK-NEXT: ret +- %1 = call i64 @callee_many_scalars(i8 1, i16 2, i32 3, i64 4, i128 5, i64 6, i128 7, i64 8) +- ret i64 %1 +-} +- +-;; Check that i256 is passed indirectly. +- +-define i64 @callee_large_scalars(i256 %a, i256 %b) nounwind { +-; CHECK-LABEL: callee_large_scalars: +-; CHECK: # %bb.0: +-; CHECK-NEXT: ld.d $a2, $a1, 24 +-; CHECK-NEXT: ld.d $a3, $a0, 24 +-; CHECK-NEXT: xor $a2, $a3, $a2 +-; CHECK-NEXT: ld.d $a3, $a1, 8 +-; CHECK-NEXT: ld.d $a4, $a0, 8 +-; CHECK-NEXT: xor $a3, $a4, $a3 +-; CHECK-NEXT: or $a2, $a3, $a2 +-; CHECK-NEXT: ld.d $a3, $a1, 16 +-; CHECK-NEXT: ld.d $a4, $a0, 16 +-; CHECK-NEXT: xor $a3, $a4, $a3 +-; CHECK-NEXT: ld.d $a1, $a1, 0 +-; CHECK-NEXT: ld.d $a0, $a0, 0 +-; CHECK-NEXT: xor $a0, $a0, $a1 +-; CHECK-NEXT: or $a0, $a0, $a3 +-; CHECK-NEXT: or $a0, $a0, $a2 +-; CHECK-NEXT: sltui $a0, $a0, 1 +-; CHECK-NEXT: ret +- %1 = icmp eq i256 %a, %b +- %2 = zext i1 %1 to i64 +- ret i64 %2 +-} +- +-define i64 @caller_large_scalars() nounwind { +-; CHECK-LABEL: caller_large_scalars: +-; CHECK: # %bb.0: +-; CHECK-NEXT: addi.d $sp, $sp, -80 +-; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +-; CHECK-NEXT: ori $a0, $zero, 2 +-; CHECK-NEXT: st.d $a0, $sp, 0 +-; CHECK-NEXT: st.d $zero, $sp, 24 +-; CHECK-NEXT: st.d $zero, $sp, 16 +-; CHECK-NEXT: st.d $zero, $sp, 8 +-; CHECK-NEXT: st.d $zero, $sp, 56 +-; CHECK-NEXT: st.d $zero, $sp, 48 +-; CHECK-NEXT: st.d $zero, $sp, 40 +-; CHECK-NEXT: ori $a0, $zero, 1 +-; CHECK-NEXT: st.d $a0, $sp, 32 +-; CHECK-NEXT: addi.d $a0, $sp, 32 +-; CHECK-NEXT: addi.d $a1, $sp, 0 +-; CHECK-NEXT: bl %plt(callee_large_scalars) +-; CHECK-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +-; CHECK-NEXT: addi.d $sp, $sp, 80 +-; CHECK-NEXT: ret +- %1 = call i64 @callee_large_scalars(i256 1, i256 2) +- ret i64 %1 +-} +- +-;; Check that arguments larger than 2*GRLen are handled correctly when their +-;; address is passed on the stack rather than in memory. +- +-;; Must keep define on a single line due to an update_llc_test_checks.py limitation +-define i64 @callee_large_scalars_exhausted_regs(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i256 %h, i64 %i, i256 %j) nounwind { +-; CHECK-LABEL: callee_large_scalars_exhausted_regs: +-; CHECK: # %bb.0: +-; CHECK-NEXT: ld.d $a0, $sp, 8 +-; CHECK-NEXT: ld.d $a1, $a0, 24 +-; CHECK-NEXT: ld.d $a2, $a7, 24 +-; CHECK-NEXT: xor $a1, $a2, $a1 +-; CHECK-NEXT: ld.d $a2, $a0, 8 +-; CHECK-NEXT: ld.d $a3, $a7, 8 +-; CHECK-NEXT: xor $a2, $a3, $a2 +-; CHECK-NEXT: or $a1, $a2, $a1 +-; CHECK-NEXT: ld.d $a2, $a0, 16 +-; CHECK-NEXT: ld.d $a3, $a7, 16 +-; CHECK-NEXT: xor $a2, $a3, $a2 +-; CHECK-NEXT: ld.d $a0, $a0, 0 +-; CHECK-NEXT: ld.d $a3, $a7, 0 +-; CHECK-NEXT: xor $a0, $a3, $a0 +-; CHECK-NEXT: or $a0, $a0, $a2 +-; CHECK-NEXT: or $a0, $a0, $a1 +-; CHECK-NEXT: sltui $a0, $a0, 1 +-; CHECK-NEXT: ret +- %1 = icmp eq i256 %h, %j +- %2 = zext i1 %1 to i64 +- ret i64 %2 +-} +- +-define i64 @caller_large_scalars_exhausted_regs() nounwind { +-; CHECK-LABEL: caller_large_scalars_exhausted_regs: +-; CHECK: # %bb.0: +-; CHECK-NEXT: addi.d $sp, $sp, -96 +-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill +-; CHECK-NEXT: addi.d $a0, $sp, 16 +-; CHECK-NEXT: st.d $a0, $sp, 8 +-; CHECK-NEXT: ori $a0, $zero, 9 +-; CHECK-NEXT: st.d $a0, $sp, 0 +-; CHECK-NEXT: ori $a0, $zero, 10 +-; CHECK-NEXT: st.d $a0, $sp, 16 +-; CHECK-NEXT: st.d $zero, $sp, 40 +-; CHECK-NEXT: st.d $zero, $sp, 32 +-; CHECK-NEXT: st.d $zero, $sp, 24 +-; CHECK-NEXT: st.d $zero, $sp, 72 +-; CHECK-NEXT: st.d $zero, $sp, 64 +-; CHECK-NEXT: st.d $zero, $sp, 56 +-; CHECK-NEXT: ori $a0, $zero, 8 +-; CHECK-NEXT: st.d $a0, $sp, 48 +-; CHECK-NEXT: ori $a0, $zero, 1 +-; CHECK-NEXT: ori $a1, $zero, 2 +-; CHECK-NEXT: ori $a2, $zero, 3 +-; CHECK-NEXT: ori $a3, $zero, 4 +-; CHECK-NEXT: ori $a4, $zero, 5 +-; CHECK-NEXT: ori $a5, $zero, 6 +-; CHECK-NEXT: ori $a6, $zero, 7 +-; CHECK-NEXT: addi.d $a7, $sp, 48 +-; CHECK-NEXT: bl %plt(callee_large_scalars_exhausted_regs) +-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload +-; CHECK-NEXT: addi.d $sp, $sp, 96 +-; CHECK-NEXT: ret +- %1 = call i64 @callee_large_scalars_exhausted_regs( +- i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i256 8, i64 9, +- i256 10) +- ret i64 %1 +-} +- +-;; Check large struct arguments, which are passed byval +- +-%struct.large = type { i64, i64, i64, i64 } +- +-define i64 @callee_large_struct(ptr byval(%struct.large) align 8 %a) nounwind { +-; CHECK-LABEL: callee_large_struct: +-; CHECK: # %bb.0: +-; CHECK-NEXT: ld.d $a1, $a0, 24 +-; CHECK-NEXT: ld.d $a0, $a0, 0 +-; CHECK-NEXT: add.d $a0, $a0, $a1 +-; CHECK-NEXT: ret +- %1 = getelementptr inbounds %struct.large, ptr %a, i64 0, i32 0 +- %2 = getelementptr inbounds %struct.large, ptr %a, i64 0, i32 3 +- %3 = load i64, ptr %1 +- %4 = load i64, ptr %2 +- %5 = add i64 %3, %4 +- ret i64 %5 +-} +- +-define i64 @caller_large_struct() nounwind { +-; CHECK-LABEL: caller_large_struct: +-; CHECK: # %bb.0: +-; CHECK-NEXT: addi.d $sp, $sp, -80 +-; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +-; CHECK-NEXT: ori $a0, $zero, 1 +-; CHECK-NEXT: st.d $a0, $sp, 40 +-; CHECK-NEXT: st.d $a0, $sp, 8 +-; CHECK-NEXT: ori $a0, $zero, 2 +-; CHECK-NEXT: st.d $a0, $sp, 48 +-; CHECK-NEXT: st.d $a0, $sp, 16 +-; CHECK-NEXT: ori $a0, $zero, 3 +-; CHECK-NEXT: st.d $a0, $sp, 56 +-; CHECK-NEXT: st.d $a0, $sp, 24 +-; CHECK-NEXT: ori $a0, $zero, 4 +-; CHECK-NEXT: st.d $a0, $sp, 64 +-; CHECK-NEXT: st.d $a0, $sp, 32 +-; CHECK-NEXT: addi.d $a0, $sp, 8 +-; CHECK-NEXT: bl %plt(callee_large_struct) +-; CHECK-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +-; CHECK-NEXT: addi.d $sp, $sp, 80 +-; CHECK-NEXT: ret +- %ls = alloca %struct.large, align 8 +- %a = getelementptr inbounds %struct.large, ptr %ls, i64 0, i32 0 +- store i64 1, ptr %a +- %b = getelementptr inbounds %struct.large, ptr %ls, i64 0, i32 1 +- store i64 2, ptr %b +- %c = getelementptr inbounds %struct.large, ptr %ls, i64 0, i32 2 +- store i64 3, ptr %c +- %d = getelementptr inbounds %struct.large, ptr %ls, i64 0, i32 3 +- store i64 4, ptr %d +- %1 = call i64 @callee_large_struct(ptr byval(%struct.large) align 8 %ls) +- ret i64 %1 +-} +- +-;; Check return scalar which size is 2*GRLen. +- +-define i128 @callee_small_scalar_ret() nounwind { +-; CHECK-LABEL: callee_small_scalar_ret: +-; CHECK: # %bb.0: +-; CHECK-NEXT: addi.w $a0, $zero, -1 +-; CHECK-NEXT: move $a1, $a0 +-; CHECK-NEXT: ret +- ret i128 -1 +-} +- +-define i64 @caller_small_scalar_ret() nounwind { +-; CHECK-LABEL: caller_small_scalar_ret: +-; CHECK: # %bb.0: +-; CHECK-NEXT: addi.d $sp, $sp, -16 +-; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +-; CHECK-NEXT: bl %plt(callee_small_scalar_ret) +-; CHECK-NEXT: addi.w $a2, $zero, -2 +-; CHECK-NEXT: xor $a0, $a0, $a2 +-; CHECK-NEXT: orn $a0, $a0, $a1 +-; CHECK-NEXT: sltui $a0, $a0, 1 +-; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +-; CHECK-NEXT: addi.d $sp, $sp, 16 +-; CHECK-NEXT: ret +- %1 = call i128 @callee_small_scalar_ret() +- %2 = icmp eq i128 -2, %1 +- %3 = zext i1 %2 to i64 +- ret i64 %3 +-} +- +-;; Check return struct which size is 2*GRLen. +- +-%struct.small = type { i64, ptr } +- +-define %struct.small @callee_small_struct_ret() nounwind { +-; CHECK-LABEL: callee_small_struct_ret: +-; CHECK: # %bb.0: +-; CHECK-NEXT: ori $a0, $zero, 1 +-; CHECK-NEXT: move $a1, $zero +-; CHECK-NEXT: ret +- ret %struct.small { i64 1, ptr null } +-} +- +-define i64 @caller_small_struct_ret() nounwind { +-; CHECK-LABEL: caller_small_struct_ret: +-; CHECK: # %bb.0: +-; CHECK-NEXT: addi.d $sp, $sp, -16 +-; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +-; CHECK-NEXT: bl %plt(callee_small_struct_ret) +-; CHECK-NEXT: add.d $a0, $a0, $a1 +-; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +-; CHECK-NEXT: addi.d $sp, $sp, 16 +-; CHECK-NEXT: ret +- %1 = call %struct.small @callee_small_struct_ret() +- %2 = extractvalue %struct.small %1, 0 +- %3 = extractvalue %struct.small %1, 1 +- %4 = ptrtoint ptr %3 to i64 +- %5 = add i64 %2, %4 +- ret i64 %5 +-} +- +-;; Check return scalar which size is more than 2*GRLen. +- +-define i256 @callee_large_scalar_ret() nounwind { +-; CHECK-LABEL: callee_large_scalar_ret: +-; CHECK: # %bb.0: +-; CHECK-NEXT: addi.w $a1, $zero, -1 +-; CHECK-NEXT: st.d $a1, $a0, 24 +-; CHECK-NEXT: st.d $a1, $a0, 16 +-; CHECK-NEXT: st.d $a1, $a0, 8 +-; CHECK-NEXT: lu12i.w $a1, -30141 +-; CHECK-NEXT: ori $a1, $a1, 747 +-; CHECK-NEXT: st.d $a1, $a0, 0 +-; CHECK-NEXT: ret +- ret i256 -123456789 +-} +- +-define void @caller_large_scalar_ret() nounwind { +-; CHECK-LABEL: caller_large_scalar_ret: +-; CHECK: # %bb.0: +-; CHECK-NEXT: addi.d $sp, $sp, -48 +-; CHECK-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill +-; CHECK-NEXT: addi.d $a0, $sp, 0 +-; CHECK-NEXT: bl %plt(callee_large_scalar_ret) +-; CHECK-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload +-; CHECK-NEXT: addi.d $sp, $sp, 48 +-; CHECK-NEXT: ret +- %1 = call i256 @callee_large_scalar_ret() +- ret void +-} +- +-;; Check return struct which size is more than 2*GRLen. +- +-define void @callee_large_struct_ret(ptr noalias sret(%struct.large) %agg.result) nounwind { +-; CHECK-LABEL: callee_large_struct_ret: +-; CHECK: # %bb.0: +-; CHECK-NEXT: ori $a1, $zero, 4 +-; CHECK-NEXT: st.w $a1, $a0, 24 +-; CHECK-NEXT: ori $a1, $zero, 3 +-; CHECK-NEXT: st.w $a1, $a0, 16 +-; CHECK-NEXT: ori $a1, $zero, 2 +-; CHECK-NEXT: st.w $a1, $a0, 8 +-; CHECK-NEXT: st.w $zero, $a0, 28 +-; CHECK-NEXT: st.w $zero, $a0, 20 +-; CHECK-NEXT: st.w $zero, $a0, 12 +-; CHECK-NEXT: st.w $zero, $a0, 4 +-; CHECK-NEXT: ori $a1, $zero, 1 +-; CHECK-NEXT: st.w $a1, $a0, 0 +-; CHECK-NEXT: ret +- %a = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 0 +- store i64 1, ptr %a, align 4 +- %b = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 1 +- store i64 2, ptr %b, align 4 +- %c = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 2 +- store i64 3, ptr %c, align 4 +- %d = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 3 +- store i64 4, ptr %d, align 4 +- ret void +-} +- +-define i64 @caller_large_struct_ret() nounwind { +-; CHECK-LABEL: caller_large_struct_ret: +-; CHECK: # %bb.0: +-; CHECK-NEXT: addi.d $sp, $sp, -48 +-; CHECK-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill +-; CHECK-NEXT: addi.d $a0, $sp, 8 +-; CHECK-NEXT: bl %plt(callee_large_struct_ret) +-; CHECK-NEXT: ld.d $a0, $sp, 32 +-; CHECK-NEXT: ld.d $a1, $sp, 8 +-; CHECK-NEXT: add.d $a0, $a1, $a0 +-; CHECK-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload +-; CHECK-NEXT: addi.d $sp, $sp, 48 +-; CHECK-NEXT: ret +- %1 = alloca %struct.large +- call void @callee_large_struct_ret(ptr sret(%struct.large) %1) +- %2 = getelementptr inbounds %struct.large, ptr %1, i64 0, i32 0 +- %3 = load i64, ptr %2 +- %4 = getelementptr inbounds %struct.large, ptr %1, i64 0, i32 3 +- %5 = load i64, ptr %4 +- %6 = add i64 %3, %5 +- ret i64 %6 +-} ++;; This file contains specific tests for the lp64d ABI. + + ;; Check pass floating-point arguments whith FPRs. + +@@ -462,26 +63,26 @@ define i64 @caller_double_in_gpr_exhausted_fprs() nounwind { + ; CHECK: # %bb.0: + ; CHECK-NEXT: addi.d $sp, $sp, -16 + ; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI21_0) +-; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI21_0) ++; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) ++; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI3_0) + ; CHECK-NEXT: fld.d $fa1, $a0, 0 +-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI21_1) +-; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI21_1) ++; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_1) ++; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI3_1) + ; CHECK-NEXT: fld.d $fa2, $a0, 0 +-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI21_2) +-; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI21_2) ++; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_2) ++; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI3_2) + ; CHECK-NEXT: fld.d $fa3, $a0, 0 +-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI21_3) +-; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI21_3) ++; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_3) ++; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI3_3) + ; CHECK-NEXT: fld.d $fa4, $a0, 0 +-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI21_4) +-; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI21_4) ++; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_4) ++; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI3_4) + ; CHECK-NEXT: fld.d $fa5, $a0, 0 +-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI21_5) +-; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI21_5) ++; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_5) ++; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI3_5) + ; CHECK-NEXT: fld.d $fa6, $a0, 0 +-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI21_6) +-; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI21_6) ++; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_6) ++; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI3_6) + ; CHECK-NEXT: fld.d $fa7, $a0, 0 + ; CHECK-NEXT: addi.d $a0, $zero, 1 + ; CHECK-NEXT: movgr2fr.d $fa0, $a0 +diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-lp64s.ll b/llvm/test/CodeGen/LoongArch/calling-conv-lp64s.ll +new file mode 100644 +index 000000000..d738c066e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/calling-conv-lp64s.ll +@@ -0,0 +1,97 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ++; RUN: llc --mtriple=loongarch64 --target-abi=lp64s < %s | FileCheck %s ++ ++;; This file contains specific tests for the lp64s ABI. ++ ++define i64 @callee_float_in_regs(i64 %a, float %b) nounwind { ++; CHECK-LABEL: callee_float_in_regs: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -16 ++; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill ++; CHECK-NEXT: move $fp, $a0 ++; CHECK-NEXT: bstrpick.d $a0, $a1, 31, 0 ++; CHECK-NEXT: bl %plt(__fixsfdi) ++; CHECK-NEXT: add.d $a0, $fp, $a0 ++; CHECK-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 16 ++; CHECK-NEXT: ret ++ %b_fptosi = fptosi float %b to i64 ++ %1 = add i64 %a, %b_fptosi ++ ret i64 %1 ++} ++ ++define i64 @caller_float_in_regs() nounwind { ++; CHECK-LABEL: caller_float_in_regs: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -16 ++; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill ++; CHECK-NEXT: ori $a0, $zero, 1 ++; CHECK-NEXT: lu12i.w $a1, 262144 ++; CHECK-NEXT: bl %plt(callee_float_in_regs) ++; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 16 ++; CHECK-NEXT: ret ++ %1 = call i64 @callee_float_in_regs(i64 1, float 2.0) ++ ret i64 %1 ++} ++ ++define i64 @callee_float_on_stack(i128 %a, i128 %b, i128 %c, i128 %d, float %e) nounwind { ++; CHECK-LABEL: callee_float_on_stack: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.w $a0, $sp, 0 ++; CHECK-NEXT: ret ++ %1 = trunc i128 %d to i64 ++ %2 = bitcast float %e to i32 ++ %3 = sext i32 %2 to i64 ++ %4 = add i64 %1, %3 ++ ret i64 %3 ++} ++ ++define i64 @caller_float_on_stack() nounwind { ++; CHECK-LABEL: caller_float_on_stack: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -16 ++; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill ++; CHECK-NEXT: lu12i.w $a0, 264704 ++; CHECK-NEXT: st.d $a0, $sp, 0 ++; CHECK-NEXT: ori $a0, $zero, 1 ++; CHECK-NEXT: ori $a2, $zero, 2 ++; CHECK-NEXT: ori $a4, $zero, 3 ++; CHECK-NEXT: ori $a6, $zero, 4 ++; CHECK-NEXT: move $a1, $zero ++; CHECK-NEXT: move $a3, $zero ++; CHECK-NEXT: move $a5, $zero ++; CHECK-NEXT: move $a7, $zero ++; CHECK-NEXT: bl %plt(callee_float_on_stack) ++; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 16 ++; CHECK-NEXT: ret ++ %1 = call i64 @callee_float_on_stack(i128 1, i128 2, i128 3, i128 4, float 5.0) ++ ret i64 %1 ++} ++ ++define float @callee_tiny_scalar_ret() nounwind { ++; CHECK-LABEL: callee_tiny_scalar_ret: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lu12i.w $a0, 260096 ++; CHECK-NEXT: ret ++ ret float 1.0 ++} ++ ++define i64 @caller_tiny_scalar_ret() nounwind { ++; CHECK-LABEL: caller_tiny_scalar_ret: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -16 ++; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill ++; CHECK-NEXT: bl %plt(callee_tiny_scalar_ret) ++; CHECK-NEXT: addi.w $a0, $a0, 0 ++; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 16 ++; CHECK-NEXT: ret ++ %1 = call float @callee_tiny_scalar_ret() ++ %2 = bitcast float %1 to i32 ++ %3 = sext i32 %2 to i64 ++ ret i64 %3 ++} +diff --git a/llvm/test/CodeGen/LoongArch/cpus-invalid.ll b/llvm/test/CodeGen/LoongArch/cpus-invalid.ll +new file mode 100644 +index 000000000..b5435fb90 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/cpus-invalid.ll +@@ -0,0 +1,7 @@ ++; RUN: llc < %s --mtriple=loongarch64 --mattr=+64bit --mcpu=invalidcpu 2>&1 | FileCheck %s ++ ++; CHECK: {{.*}} is not a recognized processor for this target ++ ++define void @f() { ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/cpus.ll b/llvm/test/CodeGen/LoongArch/cpus.ll +new file mode 100644 +index 000000000..35945ae4d +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/cpus.ll +@@ -0,0 +1,20 @@ ++;; This tests that llc accepts all valid LoongArch CPUs. ++;; Note the 'generic' names have been tested in cpu-name-generic.ll. ++ ++; RUN: llc < %s --mtriple=loongarch64 --mcpu=loongarch64 2>&1 | FileCheck %s ++; RUN: llc < %s --mtriple=loongarch64 --mcpu=la464 2>&1 | FileCheck %s ++; RUN: llc < %s --mtriple=loongarch64 2>&1 | FileCheck %s ++ ++; CHECK-NOT: {{.*}} is not a recognized processor for this target ++ ++define void @f() { ++ ret void ++} ++ ++define void @tune_cpu_loongarch64() "tune-cpu"="loongarch64" { ++ ret void ++} ++ ++define void @tune_cpu_la464() "tune-cpu"="la464" { ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/e_flags.ll b/llvm/test/CodeGen/LoongArch/e_flags.ll +index d55b9b726..c004d1f9c 100644 +--- a/llvm/test/CodeGen/LoongArch/e_flags.ll ++++ b/llvm/test/CodeGen/LoongArch/e_flags.ll +@@ -1,15 +1,32 @@ + ; RUN: llc --mtriple=loongarch32 --filetype=obj %s -o %t-la32 + ; RUN: llvm-readelf -h %t-la32 | FileCheck %s --check-prefixes=ILP32,ABI-D --match-full-lines ++ ++; RUN: llc --mtriple=loongarch32 --filetype=obj %s --target-abi=ilp32s -o %t-ilp32s ++; RUN: llvm-readelf -h %t-ilp32s | FileCheck %s --check-prefixes=ILP32,ABI-S --match-full-lines ++ ++; RUN: llc --mtriple=loongarch32 --filetype=obj %s --target-abi=ilp32f -o %t-ilp32f ++; RUN: llvm-readelf -h %t-ilp32f | FileCheck %s --check-prefixes=ILP32,ABI-F --match-full-lines ++ ++; RUN: llc --mtriple=loongarch32 --filetype=obj %s --target-abi=ilp32d -o %t-ilp32d ++; RUN: llvm-readelf -h %t-ilp32d | FileCheck %s --check-prefixes=ILP32,ABI-D --match-full-lines ++ + ; RUN: llc --mtriple=loongarch64 --filetype=obj %s -o %t-la64 + ; RUN: llvm-readelf -h %t-la64 | FileCheck %s --check-prefixes=LP64,ABI-D --match-full-lines + +-;; Note that we have not support the -target-abi option to select specific ABI. +-;; See comments in LoongArchELFStreamer.cpp. So here we only check the default behaviour. +-;; After -target-abi is supported, we can add more tests. ++; RUN: llc --mtriple=loongarch64 --filetype=obj %s --target-abi=lp64s -o %t-lp64s ++; RUN: llvm-readelf -h %t-lp64s | FileCheck %s --check-prefixes=LP64,ABI-S --match-full-lines ++ ++; RUN: llc --mtriple=loongarch64 --filetype=obj %s --target-abi=lp64f -o %t-lp64f ++; RUN: llvm-readelf -h %t-lp64f | FileCheck %s --check-prefixes=LP64,ABI-F --match-full-lines ++ ++; RUN: llc --mtriple=loongarch64 --filetype=obj %s --mattr=+d --target-abi=lp64d -o %t-lp64d ++; RUN: llvm-readelf -h %t-lp64d | FileCheck %s --check-prefixes=LP64,ABI-D --match-full-lines + + ; LP64: Class: ELF64 + ; ILP32: Class: ELF32 + ++; ABI-S: Flags: 0x41, SOFT-FLOAT, OBJ-v1 ++; ABI-F: Flags: 0x42, SINGLE-FLOAT, OBJ-v1 + ; ABI-D: Flags: 0x43, DOUBLE-FLOAT, OBJ-v1 + + define void @foo() { +diff --git a/llvm/test/CodeGen/LoongArch/inline-asm-clobbers-fcc.mir b/llvm/test/CodeGen/LoongArch/inline-asm-clobbers-fcc.mir +new file mode 100644 +index 000000000..fa5fccb1a +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/inline-asm-clobbers-fcc.mir +@@ -0,0 +1,33 @@ ++# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ++# RUN: llc --mtriple=loongarch64 --mattr=+d --run-pass=greedy %s -o - | FileCheck %s ++ ++## Check that fcc register clobbered by inlineasm is correctly saved by examing ++## a pair of pseudos (PseudoST_CFR and PseudoLD_CFR) are generated before and ++## after the INLINEASM. ++... ++--- ++name: test ++tracksRegLiveness: true ++body: | ++ bb.0.entry: ++ liveins: $f0_64, $f1_64 ++ ++ ; CHECK-LABEL: name: test ++ ; CHECK: liveins: $f0_64, $f1_64 ++ ; CHECK-NEXT: {{ $}} ++ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $f1_64 ++ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $f0_64 ++ ; CHECK-NEXT: [[FCMP_CLT_D:%[0-9]+]]:cfr = FCMP_CLT_D [[COPY]], [[COPY1]] ++ ; CHECK-NEXT: PseudoST_CFR [[FCMP_CLT_D]], %stack.0, 0 :: (store (s64) into %stack.0) ++ ; CHECK-NEXT: INLINEASM &nop, 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $fcc0 ++ ; CHECK-NEXT: [[PseudoLD_CFR:%[0-9]+]]:cfr = PseudoLD_CFR %stack.0, 0 :: (load (s64) from %stack.0) ++ ; CHECK-NEXT: $r4 = COPY [[PseudoLD_CFR]] ++ ; CHECK-NEXT: PseudoRET implicit killed $r4 ++ %1:fpr64 = COPY $f1_64 ++ %0:fpr64 = COPY $f0_64 ++ %2:cfr = FCMP_CLT_D %1, %0 ++ INLINEASM &"nop", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $fcc0 ++ $r4 = COPY %2 ++ PseudoRET implicit killed $r4 ++ ++... +diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-csr-side-effects.ll b/llvm/test/CodeGen/LoongArch/intrinsic-csr-side-effects.ll +new file mode 100644 +index 000000000..e3e23e46b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/intrinsic-csr-side-effects.ll +@@ -0,0 +1,47 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s ++; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s ++ ++declare i32 @llvm.loongarch.csrrd.w(i32 immarg) nounwind ++declare i32 @llvm.loongarch.csrwr.w(i32, i32 immarg) nounwind ++declare void @bug() ++ ++define dso_local void @foo(i32 noundef signext %flag) nounwind { ++; CHECK-LABEL: foo: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: beqz $a0, .LBB0_2 ++; CHECK-NEXT: # %bb.1: # %if.then ++; CHECK-NEXT: csrrd $a0, 2 ++; CHECK-NEXT: ori $a0, $a0, 1 ++; CHECK-NEXT: csrwr $a0, 2 ++; CHECK-NEXT: .LBB0_2: # %if.end ++; CHECK-NEXT: csrrd $a0, 2 ++; CHECK-NEXT: andi $a0, $a0, 1 ++; CHECK-NEXT: bnez $a0, .LBB0_4 ++; CHECK-NEXT: # %bb.3: # %if.then2 ++; CHECK-NEXT: b %plt(bug) ++; CHECK-NEXT: .LBB0_4: # %if.end3 ++; CHECK-NEXT: ret ++entry: ++ %tobool.not = icmp eq i32 %flag, 0 ++ br i1 %tobool.not, label %if.end, label %if.then ++ ++if.then: ; preds = %entry ++ %0 = tail call i32 @llvm.loongarch.csrrd.w(i32 2) ++ %or = or i32 %0, 1 ++ %1 = tail call i32 @llvm.loongarch.csrwr.w(i32 %or, i32 2) ++ br label %if.end ++ ++if.end: ; preds = %if.then, %entry ++ %2 = tail call i32 @llvm.loongarch.csrrd.w(i32 2) ++ %and = and i32 %2, 1 ++ %tobool1.not = icmp eq i32 %and, 0 ++ br i1 %tobool1.not, label %if.then2, label %if.end3 ++ ++if.then2: ; preds = %if.end ++ tail call void @bug() ++ br label %if.end3 ++ ++if.end3: ; preds = %if.then2, %if.end ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-error.ll b/llvm/test/CodeGen/LoongArch/intrinsic-error.ll +index 882e7f693..a839ab149 100644 +--- a/llvm/test/CodeGen/LoongArch/intrinsic-error.ll ++++ b/llvm/test/CodeGen/LoongArch/intrinsic-error.ll +@@ -1,4 +1,3 @@ +-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + ; RUN: not llc --mtriple=loongarch32 < %s 2>&1 | FileCheck %s + ; RUN: not llc --mtriple=loongarch64 < %s 2>&1 | FileCheck %s + +@@ -13,140 +12,140 @@ declare i32 @llvm.loongarch.csrwr.w(i32, i32 immarg) + declare i32 @llvm.loongarch.csrxchg.w(i32, i32, i32 immarg) + + define void @dbar_imm_out_of_hi_range() #0 { +-; CHECK: argument to 'llvm.loongarch.dbar' out of range ++; CHECK: llvm.loongarch.dbar: argument out of range. + entry: + call void @llvm.loongarch.dbar(i32 32769) + ret void + } + + define void @dbar_imm_out_of_lo_range() #0 { +-; CHECK: argument to 'llvm.loongarch.dbar' out of range ++; CHECK: llvm.loongarch.dbar: argument out of range. + entry: + call void @llvm.loongarch.dbar(i32 -1) + ret void + } + + define void @ibar_imm_out_of_hi_range() #0 { +-; CHECK: argument to 'llvm.loongarch.ibar' out of range ++; CHECK: llvm.loongarch.ibar: argument out of range. + entry: + call void @llvm.loongarch.ibar(i32 32769) + ret void + } + + define void @ibar_imm_out_of_lo_range() #0 { +-; CHECK: argument to 'llvm.loongarch.ibar' out of range ++; CHECK: llvm.loongarch.ibar: argument out of range. + entry: + call void @llvm.loongarch.ibar(i32 -1) + ret void + } + + define void @break_imm_out_of_hi_range() #0 { +-; CHECK: argument to 'llvm.loongarch.break' out of range ++; CHECK: llvm.loongarch.break: argument out of range. + entry: + call void @llvm.loongarch.break(i32 32769) + ret void + } + + define void @break_imm_out_of_lo_range() #0 { +-; CHECK: argument to 'llvm.loongarch.break' out of range ++; CHECK: llvm.loongarch.break: argument out of range. + entry: + call void @llvm.loongarch.break(i32 -1) + ret void + } + + define void @movgr2fcsr(i32 %a) nounwind { +-; CHECK: llvm.loongarch.movgr2fcsr expects basic f target feature ++; CHECK: llvm.loongarch.movgr2fcsr: requires basic 'f' target feature. + entry: + call void @llvm.loongarch.movgr2fcsr(i32 1, i32 %a) + ret void + } + + define void @movgr2fcsr_imm_out_of_hi_range(i32 %a) #0 { +-; CHECK: argument to 'llvm.loongarch.movgr2fcsr' out of range ++; CHECK: llvm.loongarch.movgr2fcsr: argument out of range. + entry: + call void @llvm.loongarch.movgr2fcsr(i32 32, i32 %a) + ret void + } + + define void @movgr2fcsr_imm_out_of_lo_range(i32 %a) #0 { +-; CHECK: argument to 'llvm.loongarch.movgr2fcsr' out of range ++; CHECK: llvm.loongarch.movgr2fcsr: argument out of range. + entry: + call void @llvm.loongarch.movgr2fcsr(i32 -1, i32 %a) + ret void + } + + define i32 @movfcsr2gr() nounwind { +-; CHECK: llvm.loongarch.movfcsr2gr expects basic f target feature ++; CHECK: llvm.loongarch.movfcsr2gr: requires basic 'f' target feature. + entry: + %res = call i32 @llvm.loongarch.movfcsr2gr(i32 1) + ret i32 %res + } + + define i32 @movfcsr2gr_imm_out_of_hi_range() #0 { +-; CHECK: argument to 'llvm.loongarch.movfcsr2gr' out of range ++; CHECK: llvm.loongarch.movfcsr2gr: argument out of range. + entry: + %res = call i32 @llvm.loongarch.movfcsr2gr(i32 32) + ret i32 %res + } + + define i32 @movfcsr2gr_imm_out_of_lo_range() #0 { +-; CHECK: argument to 'llvm.loongarch.movfcsr2gr' out of range ++; CHECK: llvm.loongarch.movfcsr2gr: argument out of range. + entry: + %res = call i32 @llvm.loongarch.movfcsr2gr(i32 -1) + ret i32 %res + } + + define void @syscall_imm_out_of_hi_range() #0 { +-; CHECK: argument to 'llvm.loongarch.syscall' out of range ++; CHECK: llvm.loongarch.syscall: argument out of range. + entry: + call void @llvm.loongarch.syscall(i32 32769) + ret void + } + + define void @syscall_imm_out_of_lo_range() #0 { +-; CHECK: argument to 'llvm.loongarch.syscall' out of range ++; CHECK: llvm.loongarch.syscall: argument out of range. + entry: + call void @llvm.loongarch.syscall(i32 -1) + ret void + } + + define i32 @csrrd_w_imm_out_of_hi_range() #0 { +-; CHECK: argument to 'llvm.loongarch.csrrd.w' out of range ++; CHECK: llvm.loongarch.csrrd.w: argument out of range. + entry: + %0 = call i32 @llvm.loongarch.csrrd.w(i32 16384) + ret i32 %0 + } + + define i32 @csrrd_w_imm_out_of_lo_range() #0 { +-; CHECK: argument to 'llvm.loongarch.csrrd.w' out of range ++; CHECK: llvm.loongarch.csrrd.w: argument out of range. + entry: + %0 = call i32 @llvm.loongarch.csrrd.w(i32 -1) + ret i32 %0 + } + + define i32 @csrwr_w_imm_out_of_hi_range(i32 %a) #0 { +-; CHECK: argument to 'llvm.loongarch.csrwr.w' out of range ++; CHECK: llvm.loongarch.csrwr.w: argument out of range. + entry: + %0 = call i32 @llvm.loongarch.csrwr.w(i32 %a, i32 16384) + ret i32 %0 + } + + define i32 @csrwr_w_imm_out_of_lo_range(i32 %a) #0 { +-; CHECK: argument to 'llvm.loongarch.csrwr.w' out of range ++; CHECK: llvm.loongarch.csrwr.w: argument out of range. + entry: + %0 = call i32 @llvm.loongarch.csrwr.w(i32 %a, i32 -1) + ret i32 %0 + } + + define i32 @csrxchg_w_imm_out_of_hi_range(i32 %a, i32 %b) #0 { +-; CHECK: argument to 'llvm.loongarch.csrxchg.w' out of range ++; CHECK: llvm.loongarch.csrxchg.w: argument out of range. + entry: + %0 = call i32 @llvm.loongarch.csrxchg.w(i32 %a, i32 %b, i32 16384) + ret i32 %0 + } + + define i32 @csrxchg_w_imm_out_of_lo_range(i32 %a, i32 %b) #0 { +-; CHECK: argument to 'llvm.loongarch.csrxchg.w' out of range ++; CHECK: llvm.loongarch.csrxchg.w: argument out of range. + entry: + %0 = call i32 @llvm.loongarch.csrxchg.w(i32 %a, i32 %b, i32 -1) + ret i32 %0 +diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-iocsr-side-effects.ll b/llvm/test/CodeGen/LoongArch/intrinsic-iocsr-side-effects.ll +new file mode 100644 +index 000000000..ad78f7f53 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/intrinsic-iocsr-side-effects.ll +@@ -0,0 +1,180 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s ++ ++declare i32 @llvm.loongarch.iocsrrd.b(i32) nounwind ++declare void @llvm.loongarch.iocsrwr.b(i32, i32) nounwind ++declare i32 @llvm.loongarch.iocsrrd.h(i32) nounwind ++declare void @llvm.loongarch.iocsrwr.h(i32, i32) nounwind ++declare i32 @llvm.loongarch.iocsrrd.w(i32) nounwind ++declare void @llvm.loongarch.iocsrwr.w(i32, i32) nounwind ++declare i64 @llvm.loongarch.iocsrrd.d(i32) nounwind ++declare void @llvm.loongarch.iocsrwr.d(i64, i32) nounwind ++declare void @bug() ++ ++define dso_local void @test_b(i32 noundef signext %flag) nounwind { ++; CHECK-LABEL: test_b: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: beqz $a0, .LBB0_2 ++; CHECK-NEXT: # %bb.1: # %if.then ++; CHECK-NEXT: ori $a0, $zero, 2 ++; CHECK-NEXT: iocsrrd.b $a1, $a0 ++; CHECK-NEXT: ori $a1, $a1, 1 ++; CHECK-NEXT: iocsrwr.b $a1, $a0 ++; CHECK-NEXT: .LBB0_2: # %if.end ++; CHECK-NEXT: ori $a0, $zero, 2 ++; CHECK-NEXT: iocsrrd.b $a0, $a0 ++; CHECK-NEXT: andi $a0, $a0, 1 ++; CHECK-NEXT: bnez $a0, .LBB0_4 ++; CHECK-NEXT: # %bb.3: # %if.then2 ++; CHECK-NEXT: b %plt(bug) ++; CHECK-NEXT: .LBB0_4: # %if.end3 ++; CHECK-NEXT: ret ++entry: ++ %tobool.not = icmp eq i32 %flag, 0 ++ br i1 %tobool.not, label %if.end, label %if.then ++ ++if.then: ; preds = %entry ++ %0 = tail call i32 @llvm.loongarch.iocsrrd.b(i32 2) ++ %or = or i32 %0, 1 ++ tail call void @llvm.loongarch.iocsrwr.b(i32 %or, i32 2) ++ br label %if.end ++ ++if.end: ; preds = %if.then, %entry ++ %1 = tail call i32 @llvm.loongarch.iocsrrd.b(i32 2) ++ %and = and i32 %1, 1 ++ %tobool1.not = icmp eq i32 %and, 0 ++ br i1 %tobool1.not, label %if.then2, label %if.end3 ++ ++if.then2: ; preds = %if.end ++ tail call void @bug() ++ br label %if.end3 ++ ++if.end3: ; preds = %if.then2, %if.end ++ ret void ++} ++ ++define dso_local void @test_h(i32 noundef signext %flag) nounwind { ++; CHECK-LABEL: test_h: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: beqz $a0, .LBB1_2 ++; CHECK-NEXT: # %bb.1: # %if.then ++; CHECK-NEXT: ori $a0, $zero, 2 ++; CHECK-NEXT: iocsrrd.h $a1, $a0 ++; CHECK-NEXT: ori $a1, $a1, 1 ++; CHECK-NEXT: iocsrwr.h $a1, $a0 ++; CHECK-NEXT: .LBB1_2: # %if.end ++; CHECK-NEXT: ori $a0, $zero, 2 ++; CHECK-NEXT: iocsrrd.h $a0, $a0 ++; CHECK-NEXT: andi $a0, $a0, 1 ++; CHECK-NEXT: bnez $a0, .LBB1_4 ++; CHECK-NEXT: # %bb.3: # %if.then2 ++; CHECK-NEXT: b %plt(bug) ++; CHECK-NEXT: .LBB1_4: # %if.end3 ++; CHECK-NEXT: ret ++entry: ++ %tobool.not = icmp eq i32 %flag, 0 ++ br i1 %tobool.not, label %if.end, label %if.then ++ ++if.then: ; preds = %entry ++ %0 = tail call i32 @llvm.loongarch.iocsrrd.h(i32 2) ++ %or = or i32 %0, 1 ++ tail call void @llvm.loongarch.iocsrwr.h(i32 %or, i32 2) ++ br label %if.end ++ ++if.end: ; preds = %if.then, %entry ++ %1 = tail call i32 @llvm.loongarch.iocsrrd.h(i32 2) ++ %and = and i32 %1, 1 ++ %tobool1.not = icmp eq i32 %and, 0 ++ br i1 %tobool1.not, label %if.then2, label %if.end3 ++ ++if.then2: ; preds = %if.end ++ tail call void @bug() ++ br label %if.end3 ++ ++if.end3: ; preds = %if.then2, %if.end ++ ret void ++} ++ ++define dso_local void @test_w(i32 noundef signext %flag) nounwind { ++; CHECK-LABEL: test_w: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: beqz $a0, .LBB2_2 ++; CHECK-NEXT: # %bb.1: # %if.then ++; CHECK-NEXT: ori $a0, $zero, 2 ++; CHECK-NEXT: iocsrrd.w $a1, $a0 ++; CHECK-NEXT: ori $a1, $a1, 1 ++; CHECK-NEXT: iocsrwr.w $a1, $a0 ++; CHECK-NEXT: .LBB2_2: # %if.end ++; CHECK-NEXT: ori $a0, $zero, 2 ++; CHECK-NEXT: iocsrrd.w $a0, $a0 ++; CHECK-NEXT: andi $a0, $a0, 1 ++; CHECK-NEXT: bnez $a0, .LBB2_4 ++; CHECK-NEXT: # %bb.3: # %if.then2 ++; CHECK-NEXT: b %plt(bug) ++; CHECK-NEXT: .LBB2_4: # %if.end3 ++; CHECK-NEXT: ret ++entry: ++ %tobool.not = icmp eq i32 %flag, 0 ++ br i1 %tobool.not, label %if.end, label %if.then ++ ++if.then: ; preds = %entry ++ %0 = tail call i32 @llvm.loongarch.iocsrrd.w(i32 2) ++ %or = or i32 %0, 1 ++ tail call void @llvm.loongarch.iocsrwr.w(i32 %or, i32 2) ++ br label %if.end ++ ++if.end: ; preds = %if.then, %entry ++ %1 = tail call i32 @llvm.loongarch.iocsrrd.w(i32 2) ++ %and = and i32 %1, 1 ++ %tobool1.not = icmp eq i32 %and, 0 ++ br i1 %tobool1.not, label %if.then2, label %if.end3 ++ ++if.then2: ; preds = %if.end ++ tail call void @bug() ++ br label %if.end3 ++ ++if.end3: ; preds = %if.then2, %if.end ++ ret void ++} ++ ++define dso_local void @test_d(i32 noundef signext %flag) nounwind { ++; CHECK-LABEL: test_d: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: beqz $a0, .LBB3_2 ++; CHECK-NEXT: # %bb.1: # %if.then ++; CHECK-NEXT: ori $a0, $zero, 2 ++; CHECK-NEXT: iocsrrd.d $a1, $a0 ++; CHECK-NEXT: ori $a1, $a1, 1 ++; CHECK-NEXT: iocsrwr.d $a1, $a0 ++; CHECK-NEXT: .LBB3_2: # %if.end ++; CHECK-NEXT: ori $a0, $zero, 2 ++; CHECK-NEXT: iocsrrd.d $a0, $a0 ++; CHECK-NEXT: andi $a0, $a0, 1 ++; CHECK-NEXT: bnez $a0, .LBB3_4 ++; CHECK-NEXT: # %bb.3: # %if.then2 ++; CHECK-NEXT: b %plt(bug) ++; CHECK-NEXT: .LBB3_4: # %if.end3 ++; CHECK-NEXT: ret ++entry: ++ %tobool.not = icmp eq i32 %flag, 0 ++ br i1 %tobool.not, label %if.end, label %if.then ++ ++if.then: ; preds = %entry ++ %0 = tail call i64 @llvm.loongarch.iocsrrd.d(i32 2) ++ %or = or i64 %0, 1 ++ tail call void @llvm.loongarch.iocsrwr.d(i64 %or, i32 2) ++ br label %if.end ++ ++if.end: ; preds = %if.then, %entry ++ %1 = tail call i64 @llvm.loongarch.iocsrrd.d(i32 2) ++ %and = and i64 %1, 1 ++ %tobool1.not = icmp eq i64 %and, 0 ++ br i1 %tobool1.not, label %if.then2, label %if.end3 ++ ++if.then2: ; preds = %if.end ++ tail call void @bug() ++ br label %if.end3 ++ ++if.end3: ; preds = %if.then2, %if.end ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-la32-error.ll b/llvm/test/CodeGen/LoongArch/intrinsic-la32-error.ll +index c91516149..5302ba558 100644 +--- a/llvm/test/CodeGen/LoongArch/intrinsic-la32-error.ll ++++ b/llvm/test/CodeGen/LoongArch/intrinsic-la32-error.ll +@@ -20,147 +20,147 @@ declare i64 @llvm.loongarch.lddir.d(i64, i64 immarg) + declare void @llvm.loongarch.ldpte.d(i64, i64 immarg) + + define void @cacop_arg0_out_of_hi_range(i32 %a) nounwind { +-; CHECK: argument to 'llvm.loongarch.cacop.w' out of range ++; CHECK: llvm.loongarch.cacop.w: argument out of range + entry: + call void @llvm.loongarch.cacop.w(i32 32, i32 %a, i32 1024) + ret void + } + + define void @cacop_arg0_out_of_lo_range(i32 %a) nounwind { +-; CHECK: argument to 'llvm.loongarch.cacop.w' out of range ++; CHECK: llvm.loongarch.cacop.w: argument out of range + entry: + call void @llvm.loongarch.cacop.w(i32 -1, i32 %a, i32 1024) + ret void + } + + define void @cacop_arg2_out_of_hi_range(i32 %a) nounwind { +-; CHECK: argument to 'llvm.loongarch.cacop.w' out of range ++; CHECK: llvm.loongarch.cacop.w: argument out of range + entry: + call void @llvm.loongarch.cacop.w(i32 1, i32 %a, i32 4096) + ret void + } + + define void @cacop_arg2_out_of_lo_range(i32 %a) nounwind { +-; CHECK: argument to 'llvm.loongarch.cacop.w' out of range ++; CHECK: llvm.loongarch.cacop.w: argument out of range + entry: + call void @llvm.loongarch.cacop.w(i32 1, i32 %a, i32 -4096) + ret void + } + + define i32 @crc_w_b_w(i32 %a, i32 %b) nounwind { +-; CHECK: llvm.loongarch.crc.w.b.w requires target: loongarch64 ++; CHECK: llvm.loongarch.crc.w.b.w: requires loongarch64 + entry: + %res = call i32 @llvm.loongarch.crc.w.b.w(i32 %a, i32 %b) + ret i32 %res + } + + define i32 @crc_w_h_w(i32 %a, i32 %b) nounwind { +-; CHECK: llvm.loongarch.crc.w.h.w requires target: loongarch64 ++; CHECK: llvm.loongarch.crc.w.h.w: requires loongarch64 + entry: + %res = call i32 @llvm.loongarch.crc.w.h.w(i32 %a, i32 %b) + ret i32 %res + } + + define i32 @crc_w_w_w(i32 %a, i32 %b) nounwind { +-; CHECK: llvm.loongarch.crc.w.w.w requires target: loongarch64 ++; CHECK: llvm.loongarch.crc.w.w.w: requires loongarch64 + entry: + %res = call i32 @llvm.loongarch.crc.w.w.w(i32 %a, i32 %b) + ret i32 %res + } + + define i32 @crc_w_d_w(i64 %a, i32 %b) nounwind { +-; CHECK: llvm.loongarch.crc.w.d.w requires target: loongarch64 ++; CHECK: llvm.loongarch.crc.w.d.w: requires loongarch64 + entry: + %res = call i32 @llvm.loongarch.crc.w.d.w(i64 %a, i32 %b) + ret i32 %res + } + + define i32 @crcc_w_b_w(i32 %a, i32 %b) nounwind { +-; CHECK: llvm.loongarch.crcc.w.b.w requires target: loongarch64 ++; CHECK: llvm.loongarch.crcc.w.b.w: requires loongarch64 + entry: + %res = call i32 @llvm.loongarch.crcc.w.b.w(i32 %a, i32 %b) + ret i32 %res + } + + define i32 @crcc_w_h_w(i32 %a, i32 %b) nounwind { +-; CHECK: llvm.loongarch.crcc.w.h.w requires target: loongarch64 ++; CHECK: llvm.loongarch.crcc.w.h.w: requires loongarch64 + entry: + %res = call i32 @llvm.loongarch.crcc.w.h.w(i32 %a, i32 %b) + ret i32 %res + } + + define i32 @crcc_w_w_w(i32 %a, i32 %b) nounwind { +-; CHECK: llvm.loongarch.crcc.w.w.w requires target: loongarch64 ++; CHECK: llvm.loongarch.crcc.w.w.w: requires loongarch64 + entry: + %res = call i32 @llvm.loongarch.crcc.w.w.w(i32 %a, i32 %b) + ret i32 %res + } + + define i32 @crcc_w_d_w(i64 %a, i32 %b) nounwind { +-; CHECK: llvm.loongarch.crcc.w.d.w requires target: loongarch64 ++; CHECK: llvm.loongarch.crcc.w.d.w: requires loongarch64 + entry: + %res = call i32 @llvm.loongarch.crcc.w.d.w(i64 %a, i32 %b) + ret i32 %res + } + + define i64 @csrrd_d() { +-; CHECK: llvm.loongarch.csrrd.d requires target: loongarch64 ++; CHECK: llvm.loongarch.csrrd.d: requires loongarch64 + entry: + %0 = tail call i64 @llvm.loongarch.csrrd.d(i32 1) + ret i64 %0 + } + + define i64 @csrwr_d(i64 %a) { +-; CHECK: llvm.loongarch.csrwr.d requires target: loongarch64 ++; CHECK: llvm.loongarch.csrwr.d: requires loongarch64 + entry: + %0 = tail call i64 @llvm.loongarch.csrwr.d(i64 %a, i32 1) + ret i64 %0 + } + + define i64 @csrxchg_d(i64 %a, i64 %b) { +-; CHECK: llvm.loongarch.csrxchg.d requires target: loongarch64 ++; CHECK: llvm.loongarch.csrxchg.d: requires loongarch64 + entry: + %0 = tail call i64 @llvm.loongarch.csrxchg.d(i64 %a, i64 %b, i32 1) + ret i64 %0 + } + + define i64 @iocsrrd_d(i32 %a) { +-; CHECK: llvm.loongarch.iocsrrd.d requires target: loongarch64 ++; CHECK: llvm.loongarch.iocsrrd.d: requires loongarch64 + entry: + %0 = tail call i64 @llvm.loongarch.iocsrrd.d(i32 %a) + ret i64 %0 + } + + define void @iocsrwr_d(i64 %a, i32 signext %b) { +-; CHECK: llvm.loongarch.iocsrwr.d requires target: loongarch64 ++; CHECK: llvm.loongarch.iocsrwr.d: requires loongarch64 + entry: + tail call void @llvm.loongarch.iocsrwr.d(i64 %a, i32 %b) + ret void + } + + define void @asrtle_d(i64 %a, i64 %b) { +-; CHECK: llvm.loongarch.asrtle.d requires target: loongarch64 ++; CHECK: llvm.loongarch.asrtle.d: requires loongarch64 + entry: + tail call void @llvm.loongarch.asrtle.d(i64 %a, i64 %b) + ret void + } + + define void @asrtgt_d(i64 %a, i64 %b) { +-; CHECK: llvm.loongarch.asrtgt.d requires target: loongarch64 ++; CHECK: llvm.loongarch.asrtgt.d: requires loongarch64 + entry: + tail call void @llvm.loongarch.asrtgt.d(i64 %a, i64 %b) + ret void + } + + define i64 @lddir_d(i64 %a) { +-; CHECK: llvm.loongarch.lddir.d requires target: loongarch64 ++; CHECK: llvm.loongarch.lddir.d: requires loongarch64 + entry: + %0 = tail call i64 @llvm.loongarch.lddir.d(i64 %a, i64 1) + ret i64 %0 + } + + define void @ldpte_d(i64 %a) { +-; CHECK: llvm.loongarch.ldpte.d requires target: loongarch64 ++; CHECK: llvm.loongarch.ldpte.d: requires loongarch64 + entry: + tail call void @llvm.loongarch.ldpte.d(i64 %a, i64 1) + ret void +diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-la64-error.ll b/llvm/test/CodeGen/LoongArch/intrinsic-la64-error.ll +index 51f6c4453..4716d401d 100644 +--- a/llvm/test/CodeGen/LoongArch/intrinsic-la64-error.ll ++++ b/llvm/test/CodeGen/LoongArch/intrinsic-la64-error.ll +@@ -8,76 +8,76 @@ declare i64 @llvm.loongarch.csrwr.d(i64, i32 immarg) + declare i64 @llvm.loongarch.csrxchg.d(i64, i64, i32 immarg) + + define i64 @csrrd_d_imm_out_of_hi_range() nounwind { +-; CHECK: argument to 'llvm.loongarch.csrrd.d' out of range ++; CHECK: llvm.loongarch.csrrd.d: argument out of range + entry: + %0 = call i64 @llvm.loongarch.csrrd.d(i32 16384) + ret i64 %0 + } + + define i64 @csrrd_d_imm_out_of_lo_range() nounwind { +-; CHECK: argument to 'llvm.loongarch.csrrd.d' out of range ++; CHECK: llvm.loongarch.csrrd.d: argument out of range + entry: + %0 = call i64 @llvm.loongarch.csrrd.d(i32 -1) + ret i64 %0 + } + + define i64 @csrwr_d_imm_out_of_hi_range(i64 %a) nounwind { +-; CHECK: argument to 'llvm.loongarch.csrwr.d' out of range ++; CHECK: llvm.loongarch.csrwr.d: argument out of range + entry: + %0 = call i64 @llvm.loongarch.csrwr.d(i64 %a, i32 16384) + ret i64 %0 + } + + define i64 @csrwr_d_imm_out_of_lo_range(i64 %a) nounwind { +-; CHECK: argument to 'llvm.loongarch.csrwr.d' out of range ++; CHECK: llvm.loongarch.csrwr.d: argument out of range + entry: + %0 = call i64 @llvm.loongarch.csrwr.d(i64 %a, i32 -1) + ret i64 %0 + } + + define i64 @csrxchg_d_imm_out_of_hi_range(i64 %a, i64 %b) nounwind { +-; CHECK: argument to 'llvm.loongarch.csrxchg.d' out of range ++; CHECK: llvm.loongarch.csrxchg.d: argument out of range + entry: + %0 = call i64 @llvm.loongarch.csrxchg.d(i64 %a, i64 %b, i32 16384) + ret i64 %0 + } + + define i64 @csrxchg_d_imm_out_of_lo_range(i64 %a, i64 %b) nounwind { +-; CHECK: argument to 'llvm.loongarch.csrxchg.d' out of range ++; CHECK: llvm.loongarch.csrxchg.d: argument out of range + entry: + %0 = call i64 @llvm.loongarch.csrxchg.d(i64 %a, i64 %b, i32 -1) + ret i64 %0 + } + + define void @cacop_w(i32 %a) nounwind { +-; CHECK: llvm.loongarch.cacop.w requires target: loongarch32 ++; CHECK: llvm.loongarch.cacop.w: requires loongarch32 + call void @llvm.loongarch.cacop.w(i32 1, i32 %a, i32 4) + ret void + } + + define void @cacop_arg0_out_of_hi_range(i64 %a) nounwind { +-; CHECK: argument to 'llvm.loongarch.cacop.d' out of range ++; CHECK: llvm.loongarch.cacop.d: argument out of range + entry: + call void @llvm.loongarch.cacop.d(i64 32, i64 %a, i64 1024) + ret void + } + + define void @cacop_arg0_out_of_lo_range(i64 %a) nounwind { +-; CHECK: argument to 'llvm.loongarch.cacop.d' out of range ++; CHECK: llvm.loongarch.cacop.d: argument out of range + entry: + call void @llvm.loongarch.cacop.d(i64 -1, i64 %a, i64 1024) + ret void + } + + define void @cacop_arg2_out_of_hi_range(i64 %a) nounwind { +-; CHECK: argument to 'llvm.loongarch.cacop.d' out of range ++; CHECK: llvm.loongarch.cacop.d: argument out of range + entry: + call void @llvm.loongarch.cacop.d(i64 1, i64 %a, i64 4096) + ret void + } + + define void @cacop_arg2_out_of_lo_range(i64 %a) nounwind { +-; CHECK: argument to 'llvm.loongarch.cacop.d' out of range ++; CHECK: llvm.loongarch.cacop.d: argument out of range + entry: + call void @llvm.loongarch.cacop.d(i64 1, i64 %a, i64 -4096) + ret void +diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-la64.ll b/llvm/test/CodeGen/LoongArch/intrinsic-la64.ll +index 7b28682b5..f0ebd8508 100644 +--- a/llvm/test/CodeGen/LoongArch/intrinsic-la64.ll ++++ b/llvm/test/CodeGen/LoongArch/intrinsic-la64.ll +@@ -29,6 +29,14 @@ define i32 @crc_w_b_w(i32 %a, i32 %b) nounwind { + ret i32 %res + } + ++define void @crc_w_b_w_noret(i32 %a, i32 %b) nounwind { ++; CHECK-LABEL: crc_w_b_w_noret: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ret ++ %res = call i32 @llvm.loongarch.crc.w.b.w(i32 %a, i32 %b) ++ ret void ++} ++ + define i32 @crc_w_h_w(i32 %a, i32 %b) nounwind { + ; CHECK-LABEL: crc_w_h_w: + ; CHECK: # %bb.0: +@@ -38,6 +46,14 @@ define i32 @crc_w_h_w(i32 %a, i32 %b) nounwind { + ret i32 %res + } + ++define void @crc_w_h_w_noret(i32 %a, i32 %b) nounwind { ++; CHECK-LABEL: crc_w_h_w_noret: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ret ++ %res = call i32 @llvm.loongarch.crc.w.h.w(i32 %a, i32 %b) ++ ret void ++} ++ + define i32 @crc_w_w_w(i32 %a, i32 %b) nounwind { + ; CHECK-LABEL: crc_w_w_w: + ; CHECK: # %bb.0: +@@ -47,6 +63,14 @@ define i32 @crc_w_w_w(i32 %a, i32 %b) nounwind { + ret i32 %res + } + ++define void @crc_w_w_w_noret(i32 %a, i32 %b) nounwind { ++; CHECK-LABEL: crc_w_w_w_noret: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ret ++ %res = call i32 @llvm.loongarch.crc.w.w.w(i32 %a, i32 %b) ++ ret void ++} ++ + define void @cacop_d(i64 %a) nounwind { + ; CHECK-LABEL: cacop_d: + ; CHECK: # %bb.0: +@@ -65,6 +89,14 @@ define i32 @crc_w_d_w(i64 %a, i32 %b) nounwind { + ret i32 %res + } + ++define void @crc_w_d_w_noret(i64 %a, i32 %b) nounwind { ++; CHECK-LABEL: crc_w_d_w_noret: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ret ++ %res = call i32 @llvm.loongarch.crc.w.d.w(i64 %a, i32 %b) ++ ret void ++} ++ + define i32 @crcc_w_b_w(i32 %a, i32 %b) nounwind { + ; CHECK-LABEL: crcc_w_b_w: + ; CHECK: # %bb.0: +@@ -74,6 +106,14 @@ define i32 @crcc_w_b_w(i32 %a, i32 %b) nounwind { + ret i32 %res + } + ++define void @crcc_w_b_w_noret(i32 %a, i32 %b) nounwind { ++; CHECK-LABEL: crcc_w_b_w_noret: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ret ++ %res = call i32 @llvm.loongarch.crcc.w.b.w(i32 %a, i32 %b) ++ ret void ++} ++ + define i32 @crcc_w_h_w(i32 %a, i32 %b) nounwind { + ; CHECK-LABEL: crcc_w_h_w: + ; CHECK: # %bb.0: +@@ -83,6 +123,14 @@ define i32 @crcc_w_h_w(i32 %a, i32 %b) nounwind { + ret i32 %res + } + ++define void @crcc_w_h_w_noret(i32 %a, i32 %b) nounwind { ++; CHECK-LABEL: crcc_w_h_w_noret: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ret ++ %res = call i32 @llvm.loongarch.crcc.w.h.w(i32 %a, i32 %b) ++ ret void ++} ++ + define i32 @crcc_w_w_w(i32 %a, i32 %b) nounwind { + ; CHECK-LABEL: crcc_w_w_w: + ; CHECK: # %bb.0: +@@ -92,6 +140,14 @@ define i32 @crcc_w_w_w(i32 %a, i32 %b) nounwind { + ret i32 %res + } + ++define void @crcc_w_w_w_noret(i32 %a, i32 %b) nounwind { ++; CHECK-LABEL: crcc_w_w_w_noret: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ret ++ %res = call i32 @llvm.loongarch.crcc.w.w.w(i32 %a, i32 %b) ++ ret void ++} ++ + define i32 @crcc_w_d_w(i64 %a, i32 %b) nounwind { + ; CHECK-LABEL: crcc_w_d_w: + ; CHECK: # %bb.0: +@@ -101,6 +157,14 @@ define i32 @crcc_w_d_w(i64 %a, i32 %b) nounwind { + ret i32 %res + } + ++define void @crcc_w_d_w_noret(i64 %a, i32 %b) nounwind { ++; CHECK-LABEL: crcc_w_d_w_noret: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ret ++ %res = call i32 @llvm.loongarch.crcc.w.d.w(i64 %a, i32 %b) ++ ret void ++} ++ + define i64 @csrrd_d() { + ; CHECK-LABEL: csrrd_d: + ; CHECK: # %bb.0: # %entry +@@ -111,6 +175,16 @@ entry: + ret i64 %0 + } + ++define void @csrrd_d_noret() { ++; CHECK-LABEL: csrrd_d_noret: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: csrrd $a0, 1 ++; CHECK-NEXT: ret ++entry: ++ %0 = tail call i64 @llvm.loongarch.csrrd.d(i32 1) ++ ret void ++} ++ + define i64 @csrwr_d(i64 %a) { + ; CHECK-LABEL: csrwr_d: + ; CHECK: # %bb.0: # %entry +@@ -121,6 +195,17 @@ entry: + ret i64 %0 + } + ++;; Check that csrwr is emitted even if the return value of the intrinsic is not used. ++define void @csrwr_d_noret(i64 %a) { ++; CHECK-LABEL: csrwr_d_noret: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: csrwr $a0, 1 ++; CHECK-NEXT: ret ++entry: ++ %0 = tail call i64 @llvm.loongarch.csrwr.d(i64 %a, i32 1) ++ ret void ++} ++ + define i64 @csrxchg_d(i64 %a, i64 %b) { + ; CHECK-LABEL: csrxchg_d: + ; CHECK: # %bb.0: # %entry +@@ -131,6 +216,17 @@ entry: + ret i64 %0 + } + ++;; Check that csrxchg is emitted even if the return value of the intrinsic is not used. ++define void @csrxchg_d_noret(i64 %a, i64 %b) { ++; CHECK-LABEL: csrxchg_d_noret: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: csrxchg $a0, $a1, 1 ++; CHECK-NEXT: ret ++entry: ++ %0 = tail call i64 @llvm.loongarch.csrxchg.d(i64 %a, i64 %b, i32 1) ++ ret void ++} ++ + define i64 @iocsrrd_d(i32 %a) { + ; CHECK-LABEL: iocsrrd_d: + ; CHECK: # %bb.0: # %entry +@@ -141,6 +237,16 @@ entry: + ret i64 %0 + } + ++define void @iocsrrd_d_noret(i32 %a) { ++; CHECK-LABEL: iocsrrd_d_noret: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: iocsrrd.d $a0, $a0 ++; CHECK-NEXT: ret ++entry: ++ %0 = tail call i64 @llvm.loongarch.iocsrrd.d(i32 %a) ++ ret void ++} ++ + define void @iocsrwr_d(i64 %a, i32 signext %b) { + ; CHECK-LABEL: iocsrwr_d: + ; CHECK: # %bb.0: # %entry +@@ -181,6 +287,16 @@ entry: + ret i64 %0 + } + ++define void @lddir_d_noret(i64 %a) { ++; CHECK-LABEL: lddir_d_noret: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: lddir $a0, $a0, 1 ++; CHECK-NEXT: ret ++entry: ++ %0 = tail call i64 @llvm.loongarch.lddir.d(i64 %a, i64 1) ++ ret void ++} ++ + define void @ldpte_d(i64 %a) { + ; CHECK-LABEL: ldpte_d: + ; CHECK: # %bb.0: # %entry +diff --git a/llvm/test/CodeGen/LoongArch/intrinsic.ll b/llvm/test/CodeGen/LoongArch/intrinsic.ll +index cfd54e17d..f49a2500a 100644 +--- a/llvm/test/CodeGen/LoongArch/intrinsic.ll ++++ b/llvm/test/CodeGen/LoongArch/intrinsic.ll +@@ -69,6 +69,17 @@ entry: + ret i32 %res + } + ++;; TODO: Optimize out `movfcsr2gr` without data-dependency. ++define void @movfcsr2gr_noret() nounwind { ++; CHECK-LABEL: movfcsr2gr_noret: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: movfcsr2gr $a0, $fcsr1 ++; CHECK-NEXT: ret ++entry: ++ %res = call i32 @llvm.loongarch.movfcsr2gr(i32 1) ++ ret void ++} ++ + define void @syscall() nounwind { + ; CHECK-LABEL: syscall: + ; CHECK: # %bb.0: # %entry +@@ -89,6 +100,16 @@ entry: + ret i32 %0 + } + ++define void @csrrd_w_noret() { ++; CHECK-LABEL: csrrd_w_noret: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: csrrd $a0, 1 ++; CHECK-NEXT: ret ++entry: ++ %0 = tail call i32 @llvm.loongarch.csrrd.w(i32 1) ++ ret void ++} ++ + define i32 @csrwr_w(i32 signext %a) { + ; CHECK-LABEL: csrwr_w: + ; CHECK: # %bb.0: # %entry +@@ -99,6 +120,17 @@ entry: + ret i32 %0 + } + ++;; Check that csrwr is emitted even if the return value of the intrinsic is not used. ++define void @csrwr_w_noret(i32 signext %a) { ++; CHECK-LABEL: csrwr_w_noret: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: csrwr $a0, 1 ++; CHECK-NEXT: ret ++entry: ++ %0 = tail call i32 @llvm.loongarch.csrwr.w(i32 %a, i32 1) ++ ret void ++} ++ + define i32 @csrxchg_w(i32 signext %a, i32 signext %b) { + ; CHECK-LABEL: csrxchg_w: + ; CHECK: # %bb.0: # %entry +@@ -109,6 +141,17 @@ entry: + ret i32 %0 + } + ++;; Check that csrxchg is emitted even if the return value of the intrinsic is not used. ++define void @csrxchg_w_noret(i32 signext %a, i32 signext %b) { ++; CHECK-LABEL: csrxchg_w_noret: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: csrxchg $a0, $a1, 1 ++; CHECK-NEXT: ret ++entry: ++ %0 = tail call i32 @llvm.loongarch.csrxchg.w(i32 %a, i32 %b, i32 1) ++ ret void ++} ++ + define i32 @iocsrrd_b(i32 %a) { + ; CHECK-LABEL: iocsrrd_b: + ; CHECK: # %bb.0: # %entry +@@ -139,6 +182,36 @@ entry: + ret i32 %0 + } + ++define void @iocsrrd_b_noret(i32 %a) { ++; CHECK-LABEL: iocsrrd_b_noret: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: iocsrrd.b $a0, $a0 ++; CHECK-NEXT: ret ++entry: ++ %0 = tail call i32 @llvm.loongarch.iocsrrd.b(i32 %a) ++ ret void ++} ++ ++define void @iocsrrd_h_noret(i32 %a) { ++; CHECK-LABEL: iocsrrd_h_noret: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: iocsrrd.h $a0, $a0 ++; CHECK-NEXT: ret ++entry: ++ %0 = tail call i32 @llvm.loongarch.iocsrrd.h(i32 %a) ++ ret void ++} ++ ++define void @iocsrrd_w_noret(i32 %a) { ++; CHECK-LABEL: iocsrrd_w_noret: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: iocsrrd.w $a0, $a0 ++; CHECK-NEXT: ret ++entry: ++ %0 = tail call i32 @llvm.loongarch.iocsrrd.w(i32 %a) ++ ret void ++} ++ + define void @iocsrwr_b(i32 %a, i32 %b) { + ; CHECK-LABEL: iocsrwr_b: + ; CHECK: # %bb.0: # %entry +@@ -178,3 +251,12 @@ entry: + %0 = tail call i32 @llvm.loongarch.cpucfg(i32 %a) + ret i32 %0 + } ++ ++define void @cpucfg_noret(i32 %a) { ++; CHECK-LABEL: cpucfg_noret: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: ret ++entry: ++ %0 = tail call i32 @llvm.loongarch.cpucfg(i32 %a) ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll +index 01f96688f..3d7aa871b 100644 +--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll ++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll +@@ -315,10 +315,7 @@ define double @double_fadd_acquire(ptr %p) nounwind { + ; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill + ; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill + ; LA64F-NEXT: move $fp, $a0 +-; LA64F-NEXT: ld.wu $a0, $a0, 0 +-; LA64F-NEXT: ld.wu $a1, $fp, 4 +-; LA64F-NEXT: slli.d $a1, $a1, 32 +-; LA64F-NEXT: or $a0, $a1, $a0 ++; LA64F-NEXT: ld.d $a0, $a0, 0 + ; LA64F-NEXT: ori $s0, $zero, 8 + ; LA64F-NEXT: addi.d $s1, $sp, 8 + ; LA64F-NEXT: addi.d $s2, $sp, 0 +@@ -360,11 +357,7 @@ define double @double_fadd_acquire(ptr %p) nounwind { + ; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill + ; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill + ; LA64D-NEXT: move $fp, $a0 +-; LA64D-NEXT: ld.wu $a0, $a0, 0 +-; LA64D-NEXT: ld.wu $a1, $fp, 4 +-; LA64D-NEXT: slli.d $a1, $a1, 32 +-; LA64D-NEXT: or $a0, $a1, $a0 +-; LA64D-NEXT: movgr2fr.d $fa0, $a0 ++; LA64D-NEXT: fld.d $fa0, $a0, 0 + ; LA64D-NEXT: addi.d $a0, $zero, 1 + ; LA64D-NEXT: movgr2fr.d $fs0, $a0 + ; LA64D-NEXT: ori $s0, $zero, 8 +@@ -411,10 +404,7 @@ define double @double_fsub_acquire(ptr %p) nounwind { + ; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill + ; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill + ; LA64F-NEXT: move $fp, $a0 +-; LA64F-NEXT: ld.wu $a0, $a0, 0 +-; LA64F-NEXT: ld.wu $a1, $fp, 4 +-; LA64F-NEXT: slli.d $a1, $a1, 32 +-; LA64F-NEXT: or $a0, $a1, $a0 ++; LA64F-NEXT: ld.d $a0, $a0, 0 + ; LA64F-NEXT: ori $s0, $zero, 8 + ; LA64F-NEXT: addi.d $s1, $sp, 8 + ; LA64F-NEXT: addi.d $s2, $sp, 0 +@@ -456,11 +446,7 @@ define double @double_fsub_acquire(ptr %p) nounwind { + ; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill + ; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill + ; LA64D-NEXT: move $fp, $a0 +-; LA64D-NEXT: ld.wu $a0, $a0, 0 +-; LA64D-NEXT: ld.wu $a1, $fp, 4 +-; LA64D-NEXT: slli.d $a1, $a1, 32 +-; LA64D-NEXT: or $a0, $a1, $a0 +-; LA64D-NEXT: movgr2fr.d $fa0, $a0 ++; LA64D-NEXT: fld.d $fa0, $a0, 0 + ; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) + ; LA64D-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI5_0) + ; LA64D-NEXT: fld.d $fs0, $a0, 0 +@@ -507,10 +493,7 @@ define double @double_fmin_acquire(ptr %p) nounwind { + ; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill + ; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill + ; LA64F-NEXT: move $fp, $a0 +-; LA64F-NEXT: ld.wu $a0, $a0, 0 +-; LA64F-NEXT: ld.wu $a1, $fp, 4 +-; LA64F-NEXT: slli.d $a1, $a1, 32 +-; LA64F-NEXT: or $a0, $a1, $a0 ++; LA64F-NEXT: ld.d $a0, $a0, 0 + ; LA64F-NEXT: ori $s0, $zero, 8 + ; LA64F-NEXT: addi.d $s1, $sp, 8 + ; LA64F-NEXT: addi.d $s2, $sp, 0 +@@ -552,11 +535,7 @@ define double @double_fmin_acquire(ptr %p) nounwind { + ; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill + ; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill + ; LA64D-NEXT: move $fp, $a0 +-; LA64D-NEXT: ld.wu $a0, $a0, 0 +-; LA64D-NEXT: ld.wu $a1, $fp, 4 +-; LA64D-NEXT: slli.d $a1, $a1, 32 +-; LA64D-NEXT: or $a0, $a1, $a0 +-; LA64D-NEXT: movgr2fr.d $fa0, $a0 ++; LA64D-NEXT: fld.d $fa0, $a0, 0 + ; LA64D-NEXT: addi.d $a0, $zero, 1 + ; LA64D-NEXT: movgr2fr.d $fs0, $a0 + ; LA64D-NEXT: ori $s0, $zero, 8 +@@ -604,10 +583,7 @@ define double @double_fmax_acquire(ptr %p) nounwind { + ; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill + ; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill + ; LA64F-NEXT: move $fp, $a0 +-; LA64F-NEXT: ld.wu $a0, $a0, 0 +-; LA64F-NEXT: ld.wu $a1, $fp, 4 +-; LA64F-NEXT: slli.d $a1, $a1, 32 +-; LA64F-NEXT: or $a0, $a1, $a0 ++; LA64F-NEXT: ld.d $a0, $a0, 0 + ; LA64F-NEXT: ori $s0, $zero, 8 + ; LA64F-NEXT: addi.d $s1, $sp, 8 + ; LA64F-NEXT: addi.d $s2, $sp, 0 +@@ -649,11 +625,7 @@ define double @double_fmax_acquire(ptr %p) nounwind { + ; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill + ; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill + ; LA64D-NEXT: move $fp, $a0 +-; LA64D-NEXT: ld.wu $a0, $a0, 0 +-; LA64D-NEXT: ld.wu $a1, $fp, 4 +-; LA64D-NEXT: slli.d $a1, $a1, 32 +-; LA64D-NEXT: or $a0, $a1, $a0 +-; LA64D-NEXT: movgr2fr.d $fa0, $a0 ++; LA64D-NEXT: fld.d $fa0, $a0, 0 + ; LA64D-NEXT: addi.d $a0, $zero, 1 + ; LA64D-NEXT: movgr2fr.d $fs0, $a0 + ; LA64D-NEXT: ori $s0, $zero, 8 +diff --git a/llvm/test/CodeGen/LoongArch/patchable-function-entry.ll b/llvm/test/CodeGen/LoongArch/patchable-function-entry.ll +new file mode 100644 +index 000000000..12d4bfb50 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/patchable-function-entry.ll +@@ -0,0 +1,63 @@ ++;; Test the function attribute "patchable-function-entry". ++;; Adapted from the RISCV test case. ++; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefixes=CHECK,LA32 ++; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefixes=CHECK,LA64 ++ ++define void @f0() "patchable-function-entry"="0" { ++; CHECK-LABEL: f0: ++; CHECK-NEXT: .Lfunc_begin0: ++; CHECK-NOT: nop ++; CHECK: ret ++; CHECK-NOT: .section __patchable_function_entries ++ ret void ++} ++ ++define void @f1() "patchable-function-entry"="1" { ++; CHECK-LABEL: f1: ++; CHECK-NEXT: .Lfunc_begin1: ++; CHECK: nop ++; CHECK-NEXT: ret ++; CHECK: .section __patchable_function_entries,"awo",@progbits,f1{{$}} ++; LA32: .p2align 2 ++; LA32-NEXT: .word .Lfunc_begin1 ++; LA64: .p2align 3 ++; LA64-NEXT: .dword .Lfunc_begin1 ++ ret void ++} ++ ++$f5 = comdat any ++define void @f5() "patchable-function-entry"="5" comdat { ++; CHECK-LABEL: f5: ++; CHECK-NEXT: .Lfunc_begin2: ++; CHECK-COUNT-5: nop ++; CHECK-NEXT: ret ++; CHECK: .section __patchable_function_entries,"aGwo",@progbits,f5,comdat,f5{{$}} ++; LA32: .p2align 2 ++; LA32-NEXT: .word .Lfunc_begin2 ++; LA64: .p2align 3 ++; LA64-NEXT: .dword .Lfunc_begin2 ++ ret void ++} ++ ++;; -fpatchable-function-entry=3,2 ++;; "patchable-function-prefix" emits data before the function entry label. ++define void @f3_2() "patchable-function-entry"="1" "patchable-function-prefix"="2" { ++; CHECK-LABEL: .type f3_2,@function ++; CHECK-NEXT: .Ltmp0: # @f3_2 ++; CHECK-COUNT-2: nop ++; CHECK-NEXT: f3_2: ++; CHECK: # %bb.0: ++; CHECK-NEXT: nop ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA64-NEXT: addi.d $sp, $sp, -16 ++;; .size does not include the prefix. ++; CHECK: .Lfunc_end3: ++; CHECK-NEXT: .size f3_2, .Lfunc_end3-f3_2 ++; CHECK: .section __patchable_function_entries,"awo",@progbits,f3_2{{$}} ++; LA32: .p2align 2 ++; LA32-NEXT: .word .Ltmp0 ++; LA64: .p2align 3 ++; LA64-NEXT: .dword .Ltmp0 ++ %frame = alloca i8, i32 16 ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/tail-calls.ll b/llvm/test/CodeGen/LoongArch/tail-calls.ll +index f09b49688..ff5476317 100644 +--- a/llvm/test/CodeGen/LoongArch/tail-calls.ll ++++ b/llvm/test/CodeGen/LoongArch/tail-calls.ll +@@ -13,6 +13,7 @@ entry: + } + + ;; Perform tail call optimization for external symbol. ++;; Bytes copied should be large enough, otherwise the memcpy call would be optimized to multiple ld/st insns. + @dest = global [2 x i8] zeroinitializer + declare void @llvm.memcpy.p0i8.p0i8.i32(ptr, ptr, i32, i1) + define void @caller_extern(ptr %src) optsize { +@@ -21,10 +22,10 @@ define void @caller_extern(ptr %src) optsize { + ; CHECK-NEXT: move $a1, $a0 + ; CHECK-NEXT: pcalau12i $a0, %got_pc_hi20(dest) + ; CHECK-NEXT: ld.d $a0, $a0, %got_pc_lo12(dest) +-; CHECK-NEXT: ori $a2, $zero, 7 ++; CHECK-NEXT: ori $a2, $zero, 33 + ; CHECK-NEXT: b %plt(memcpy) + entry: +- tail call void @llvm.memcpy.p0i8.p0i8.i32(ptr getelementptr inbounds ([2 x i8], ptr @dest, i32 0, i32 0), ptr %src, i32 7, i1 false) ++ tail call void @llvm.memcpy.p0i8.p0i8.i32(ptr getelementptr inbounds ([2 x i8], ptr @dest, i32 0, i32 0), ptr %src, i32 33, i1 false) + ret void + } + +diff --git a/llvm/test/CodeGen/LoongArch/target-abi-from-triple-edge-cases.ll b/llvm/test/CodeGen/LoongArch/target-abi-from-triple-edge-cases.ll +new file mode 100644 +index 000000000..1d5ed089c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/target-abi-from-triple-edge-cases.ll +@@ -0,0 +1,74 @@ ++;; Check that an unknown --target-abi is ignored and the triple-implied ABI is ++;; used. ++; RUN: llc --mtriple=loongarch32-linux-gnu --target-abi=foo --mattr=+d < %s 2>&1 \ ++; RUN: | FileCheck %s --check-prefixes=ILP32D,UNKNOWN ++; RUN: llc --mtriple=loongarch64-linux-gnu --target-abi=foo --mattr=+d < %s 2>&1 \ ++; RUN: | FileCheck %s --check-prefixes=LP64D,UNKNOWN ++ ++; UNKNOWN: 'foo' is not a recognized ABI for this target, ignoring and using triple-implied ABI ++ ++;; Check that --target-abi takes precedence over triple-supplied ABI modifiers. ++; RUN: llc --mtriple=loongarch32-linux-gnusf --target-abi=ilp32d --mattr=+d < %s 2>&1 \ ++; RUN: | FileCheck %s --check-prefixes=ILP32D,CONFLICT-ILP32D ++; RUN: llc --mtriple=loongarch64-linux-gnusf --target-abi=lp64d --mattr=+d < %s 2>&1 \ ++; RUN: | FileCheck %s --check-prefixes=LP64D,CONFLICT-LP64D ++ ++; CONFLICT-ILP32D: warning: triple-implied ABI conflicts with provided target-abi 'ilp32d', using target-abi ++; CONFLICT-LP64D: warning: triple-implied ABI conflicts with provided target-abi 'lp64d', using target-abi ++ ++;; Check that no warning is reported when there is no environment component in ++;; triple-supplied ABI modifiers and --target-abi is used. ++; RUN: llc --mtriple=loongarch64-linux --target-abi=lp64d --mattr=+d < %s 2>&1 \ ++; RUN: | FileCheck %s --check-prefixes=LP64D,NO-WARNING ++ ++; NO-WARNING-NOT: warning: triple-implied ABI conflicts with provided target-abi 'lp64d', using target-abi ++ ++;; Check that ILP32-on-LA64 and LP64-on-LA32 combinations are handled properly. ++; RUN: llc --mtriple=loongarch64 --target-abi=ilp32d --mattr=+d < %s 2>&1 \ ++; RUN: | FileCheck %s --check-prefixes=LP64D,32ON64 ++; RUN: llc --mtriple=loongarch32 --target-abi=lp64d --mattr=+d < %s 2>&1 \ ++; RUN: | FileCheck %s --check-prefixes=ILP32D,64ON32 ++ ++; 32ON64: 32-bit ABIs are not supported for 64-bit targets, ignoring target-abi and using triple-implied ABI ++; 64ON32: 64-bit ABIs are not supported for 32-bit targets, ignoring target-abi and using triple-implied ABI ++ ++define float @f(float %a) { ++; ILP32D-LABEL: f: ++; ILP32D: # %bb.0: ++; ILP32D-NEXT: addi.w $a0, $zero, 1 ++; ILP32D-NEXT: movgr2fr.w $fa1, $a0 ++; ILP32D-NEXT: ffint.s.w $fa1, $fa1 ++; ILP32D-NEXT: fadd.s $fa0, $fa0, $fa1 ++; ILP32D-NEXT: ret ++; ++; LP64D-LABEL: f: ++; LP64D: # %bb.0: ++; LP64D-NEXT: addi.w $a0, $zero, 1 ++; LP64D-NEXT: movgr2fr.w $fa1, $a0 ++; LP64D-NEXT: ffint.s.w $fa1, $fa1 ++; LP64D-NEXT: fadd.s $fa0, $fa0, $fa1 ++; LP64D-NEXT: ret ++ %1 = fadd float %a, 1.0 ++ ret float %1 ++} ++ ++define double @g(double %a) { ++; ILP32D-LABEL: g: ++; ILP32D: # %bb.0: ++; ILP32D-NEXT: addi.w $a0, $zero, 1 ++; ILP32D-NEXT: movgr2fr.w $fa1, $a0 ++; ILP32D-NEXT: ffint.s.w $fa1, $fa1 ++; ILP32D-NEXT: fcvt.d.s $fa1, $fa1 ++; ILP32D-NEXT: fadd.d $fa0, $fa0, $fa1 ++; ILP32D-NEXT: ret ++; ++; LP64D-LABEL: g: ++; LP64D: # %bb.0: ++; LP64D-NEXT: addi.d $a0, $zero, 1 ++; LP64D-NEXT: movgr2fr.d $fa1, $a0 ++; LP64D-NEXT: ffint.d.l $fa1, $fa1 ++; LP64D-NEXT: fadd.d $fa0, $fa0, $fa1 ++; LP64D-NEXT: ret ++ %1 = fadd double %a, 1.0 ++ ret double %1 ++} +diff --git a/llvm/test/CodeGen/LoongArch/target-abi-from-triple.ll b/llvm/test/CodeGen/LoongArch/target-abi-from-triple.ll +new file mode 100644 +index 000000000..0aca33903 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/target-abi-from-triple.ll +@@ -0,0 +1,49 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++ ++;; Check that the correct ABI is chosen based on the triple given. ++;; TODO: enable the S and F ABIs once support is wired up. ++; RUN: llc --mtriple=loongarch32-linux-gnuf64 --mattr=+d < %s \ ++; RUN: | FileCheck %s --check-prefix=ILP32D ++; RUN: llc --mtriple=loongarch64-linux-gnuf64 --mattr=+d < %s \ ++; RUN: | FileCheck %s --check-prefix=LP64D ++ ++define float @f(float %a) { ++; ILP32D-LABEL: f: ++; ILP32D: # %bb.0: ++; ILP32D-NEXT: addi.w $a0, $zero, 1 ++; ILP32D-NEXT: movgr2fr.w $fa1, $a0 ++; ILP32D-NEXT: ffint.s.w $fa1, $fa1 ++; ILP32D-NEXT: fadd.s $fa0, $fa0, $fa1 ++; ILP32D-NEXT: ret ++; ++; LP64D-LABEL: f: ++; LP64D: # %bb.0: ++; LP64D-NEXT: addi.w $a0, $zero, 1 ++; LP64D-NEXT: movgr2fr.w $fa1, $a0 ++; LP64D-NEXT: ffint.s.w $fa1, $fa1 ++; LP64D-NEXT: fadd.s $fa0, $fa0, $fa1 ++; LP64D-NEXT: ret ++ %1 = fadd float %a, 1.0 ++ ret float %1 ++} ++ ++define double @g(double %a) { ++; ILP32D-LABEL: g: ++; ILP32D: # %bb.0: ++; ILP32D-NEXT: addi.w $a0, $zero, 1 ++; ILP32D-NEXT: movgr2fr.w $fa1, $a0 ++; ILP32D-NEXT: ffint.s.w $fa1, $fa1 ++; ILP32D-NEXT: fcvt.d.s $fa1, $fa1 ++; ILP32D-NEXT: fadd.d $fa0, $fa0, $fa1 ++; ILP32D-NEXT: ret ++; ++; LP64D-LABEL: g: ++; LP64D: # %bb.0: ++; LP64D-NEXT: addi.d $a0, $zero, 1 ++; LP64D-NEXT: movgr2fr.d $fa1, $a0 ++; LP64D-NEXT: ffint.d.l $fa1, $fa1 ++; LP64D-NEXT: fadd.d $fa0, $fa0, $fa1 ++; LP64D-NEXT: ret ++ %1 = fadd double %a, 1.0 ++ ret double %1 ++} +diff --git a/llvm/test/CodeGen/LoongArch/unaligned-access.ll b/llvm/test/CodeGen/LoongArch/unaligned-access.ll +new file mode 100644 +index 000000000..871c17f06 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/unaligned-access.ll +@@ -0,0 +1,72 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ++ ++;; Test the ual feature which is similar to AArch64/arm64-strict-align.ll. ++ ++; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32-ALIGNED ++; RUN: llc --mtriple=loongarch32 --mattr=+ual < %s | FileCheck %s --check-prefix=LA32-UNALIGNED ++; RUN: llc --mtriple=loongarch32 --mattr=-ual < %s | FileCheck %s --check-prefix=LA32-ALIGNED ++ ++; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64-UNALIGNED ++; RUN: llc --mtriple=loongarch64 --mattr=+ual < %s | FileCheck %s --check-prefix=LA64-UNALIGNED ++; RUN: llc --mtriple=loongarch64 --mattr=-ual < %s | FileCheck %s --check-prefix=LA64-ALIGNED ++ ++define i32 @f0(ptr %p) nounwind { ++; LA32-ALIGNED-LABEL: f0: ++; LA32-ALIGNED: # %bb.0: ++; LA32-ALIGNED-NEXT: ld.hu $a1, $a0, 0 ++; LA32-ALIGNED-NEXT: ld.hu $a0, $a0, 2 ++; LA32-ALIGNED-NEXT: slli.w $a0, $a0, 16 ++; LA32-ALIGNED-NEXT: or $a0, $a0, $a1 ++; LA32-ALIGNED-NEXT: ret ++; ++; LA32-UNALIGNED-LABEL: f0: ++; LA32-UNALIGNED: # %bb.0: ++; LA32-UNALIGNED-NEXT: ld.w $a0, $a0, 0 ++; LA32-UNALIGNED-NEXT: ret ++; ++; LA64-UNALIGNED-LABEL: f0: ++; LA64-UNALIGNED: # %bb.0: ++; LA64-UNALIGNED-NEXT: ld.w $a0, $a0, 0 ++; LA64-UNALIGNED-NEXT: ret ++; ++; LA64-ALIGNED-LABEL: f0: ++; LA64-ALIGNED: # %bb.0: ++; LA64-ALIGNED-NEXT: ld.hu $a1, $a0, 0 ++; LA64-ALIGNED-NEXT: ld.h $a0, $a0, 2 ++; LA64-ALIGNED-NEXT: slli.d $a0, $a0, 16 ++; LA64-ALIGNED-NEXT: or $a0, $a0, $a1 ++; LA64-ALIGNED-NEXT: ret ++ %tmp = load i32, ptr %p, align 2 ++ ret i32 %tmp ++} ++ ++define i64 @f1(ptr %p) nounwind { ++; LA32-ALIGNED-LABEL: f1: ++; LA32-ALIGNED: # %bb.0: ++; LA32-ALIGNED-NEXT: ld.w $a2, $a0, 0 ++; LA32-ALIGNED-NEXT: ld.w $a1, $a0, 4 ++; LA32-ALIGNED-NEXT: move $a0, $a2 ++; LA32-ALIGNED-NEXT: ret ++; ++; LA32-UNALIGNED-LABEL: f1: ++; LA32-UNALIGNED: # %bb.0: ++; LA32-UNALIGNED-NEXT: ld.w $a2, $a0, 0 ++; LA32-UNALIGNED-NEXT: ld.w $a1, $a0, 4 ++; LA32-UNALIGNED-NEXT: move $a0, $a2 ++; LA32-UNALIGNED-NEXT: ret ++; ++; LA64-UNALIGNED-LABEL: f1: ++; LA64-UNALIGNED: # %bb.0: ++; LA64-UNALIGNED-NEXT: ld.d $a0, $a0, 0 ++; LA64-UNALIGNED-NEXT: ret ++; ++; LA64-ALIGNED-LABEL: f1: ++; LA64-ALIGNED: # %bb.0: ++; LA64-ALIGNED-NEXT: ld.wu $a1, $a0, 0 ++; LA64-ALIGNED-NEXT: ld.wu $a0, $a0, 4 ++; LA64-ALIGNED-NEXT: slli.d $a0, $a0, 32 ++; LA64-ALIGNED-NEXT: or $a0, $a0, $a1 ++; LA64-ALIGNED-NEXT: ret ++ %tmp = load i64, ptr %p, align 4 ++ ret i64 %tmp ++} +diff --git a/llvm/test/CodeGen/LoongArch/unaligned-memcpy-inline.ll b/llvm/test/CodeGen/LoongArch/unaligned-memcpy-inline.ll +new file mode 100644 +index 000000000..37afe7e3e +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/unaligned-memcpy-inline.ll +@@ -0,0 +1,97 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ++ ++;; Test how memcpy is optimized when ual is turned off which is similar to AArch64/arm64-misaligned-memcpy-inline.ll. ++ ++; RUN: llc --mtriple=loongarch32 --mattr=-ual < %s | FileCheck %s --check-prefix=LA32 ++; RUN: llc --mtriple=loongarch64 --mattr=-ual < %s | FileCheck %s --check-prefix=LA64 ++ ++;; Small (16 bytes here) unaligned memcpy() should be a function call if ++;; ual is turned off. ++define void @t0(ptr %out, ptr %in) { ++; LA32-LABEL: t0: ++; LA32: # %bb.0: # %entry ++; LA32-NEXT: addi.w $sp, $sp, -16 ++; LA32-NEXT: .cfi_def_cfa_offset 16 ++; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ++; LA32-NEXT: .cfi_offset 1, -4 ++; LA32-NEXT: ori $a2, $zero, 16 ++; LA32-NEXT: bl %plt(memcpy) ++; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ++; LA32-NEXT: addi.w $sp, $sp, 16 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: t0: ++; LA64: # %bb.0: # %entry ++; LA64-NEXT: addi.d $sp, $sp, -16 ++; LA64-NEXT: .cfi_def_cfa_offset 16 ++; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill ++; LA64-NEXT: .cfi_offset 1, -8 ++; LA64-NEXT: ori $a2, $zero, 16 ++; LA64-NEXT: bl %plt(memcpy) ++; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ++; LA64-NEXT: addi.d $sp, $sp, 16 ++; LA64-NEXT: ret ++entry: ++ call void @llvm.memcpy.p0.p0.i64(ptr %out, ptr %in, i64 16, i1 false) ++ ret void ++} ++ ++;; Small (16 bytes here) aligned memcpy() should be inlined even if ++;; ual is turned off. ++define void @t1(ptr align 8 %out, ptr align 8 %in) { ++; LA32-LABEL: t1: ++; LA32: # %bb.0: # %entry ++; LA32-NEXT: ld.w $a2, $a1, 12 ++; LA32-NEXT: st.w $a2, $a0, 12 ++; LA32-NEXT: ld.w $a2, $a1, 8 ++; LA32-NEXT: st.w $a2, $a0, 8 ++; LA32-NEXT: ld.w $a2, $a1, 4 ++; LA32-NEXT: st.w $a2, $a0, 4 ++; LA32-NEXT: ld.w $a1, $a1, 0 ++; LA32-NEXT: st.w $a1, $a0, 0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: t1: ++; LA64: # %bb.0: # %entry ++; LA64-NEXT: ld.d $a2, $a1, 8 ++; LA64-NEXT: st.d $a2, $a0, 8 ++; LA64-NEXT: ld.d $a1, $a1, 0 ++; LA64-NEXT: st.d $a1, $a0, 0 ++; LA64-NEXT: ret ++entry: ++ call void @llvm.memcpy.p0.p0.i64(ptr align 8 %out, ptr align 8 %in, i64 16, i1 false) ++ ret void ++} ++ ++;; Tiny (4 bytes here) unaligned memcpy() should be inlined with byte sized ++;; loads and stores if ual is turned off. ++define void @t2(ptr %out, ptr %in) { ++; LA32-LABEL: t2: ++; LA32: # %bb.0: # %entry ++; LA32-NEXT: ld.b $a2, $a1, 3 ++; LA32-NEXT: st.b $a2, $a0, 3 ++; LA32-NEXT: ld.b $a2, $a1, 2 ++; LA32-NEXT: st.b $a2, $a0, 2 ++; LA32-NEXT: ld.b $a2, $a1, 1 ++; LA32-NEXT: st.b $a2, $a0, 1 ++; LA32-NEXT: ld.b $a1, $a1, 0 ++; LA32-NEXT: st.b $a1, $a0, 0 ++; LA32-NEXT: ret ++; ++; LA64-LABEL: t2: ++; LA64: # %bb.0: # %entry ++; LA64-NEXT: ld.b $a2, $a1, 3 ++; LA64-NEXT: st.b $a2, $a0, 3 ++; LA64-NEXT: ld.b $a2, $a1, 2 ++; LA64-NEXT: st.b $a2, $a0, 2 ++; LA64-NEXT: ld.b $a2, $a1, 1 ++; LA64-NEXT: st.b $a2, $a0, 1 ++; LA64-NEXT: ld.b $a1, $a1, 0 ++; LA64-NEXT: st.b $a1, $a0, 0 ++; LA64-NEXT: ret ++entry: ++ call void @llvm.memcpy.p0.p0.i64(ptr %out, ptr %in, i64 4, i1 false) ++ ret void ++} ++ ++declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1) +diff --git a/llvm/test/DebugInfo/X86/dbg-value-no-crash.ll b/llvm/test/DebugInfo/X86/dbg-value-no-crash.ll +new file mode 100644 +index 000000000..8ff055f13 +--- /dev/null ++++ b/llvm/test/DebugInfo/X86/dbg-value-no-crash.ll +@@ -0,0 +1,39 @@ ++; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=avx512bw,avx512vl -o - %s ++ ++;; Check this won't result in crash. ++define <8 x i32> @foo(ptr %0, <8 x i32> %1, i8 %2, i8 %3) { ++ %5 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %1, <8 x i32> zeroinitializer) ++ %6 = add nsw <8 x i32> %1, ++ call void @llvm.dbg.value(metadata <8 x i32> %6, metadata !4, metadata !DIExpression()), !dbg !15 ++ %7 = bitcast i8 %2 to <8 x i1> ++ %8 = select <8 x i1> %7, <8 x i32> %6, <8 x i32> %5 ++ %9 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %8, <8 x i32> zeroinitializer) ++ %10 = bitcast i8 %3 to <8 x i1> ++ %11 = select <8 x i1> %10, <8 x i32> %9, <8 x i32> ++ ret <8 x i32> %11 ++} ++ ++declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>) ++declare void @llvm.dbg.value(metadata, metadata, metadata) ++ ++!llvm.dbg.cu = !{!0} ++!llvm.module.flags = !{!3} ++ ++!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 16.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !2) ++!1 = !DIFile(filename: "a.cpp", directory: "/") ++!2 = !{} ++!3 = !{i32 2, !"Debug Info Version", i32 3} ++!4 = !DILocalVariable(name: "a", arg: 2, scope: !5, file: !1, line: 12, type: !11) ++!5 = distinct !DISubprogram(name: "foo", scope: !6, file: !1, line: 12, type: !7, scopeLine: 12, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, declaration: !9, retainedNodes: !10) ++!6 = !DINamespace(name: "ns1", scope: null) ++!7 = !DISubroutineType(types: !8) ++!8 = !{null} ++!9 = !DISubprogram(name: "foo", scope: !6, file: !1, line: 132, type: !7, scopeLine: 12, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized) ++!10 = !{!4} ++!11 = !DICompositeType(tag: DW_TAG_array_type, baseType: !12, size: 256, flags: DIFlagVector, elements: !13) ++!12 = !DIBasicType(name: "long long", size: 64, encoding: DW_ATE_signed) ++!13 = !{!14} ++!14 = !DISubrange(count: 4) ++!15 = !DILocation(line: 0, scope: !5, inlinedAt: !16) ++!16 = !DILocation(line: 18, scope: !17) ++!17 = distinct !DISubprogram(name: "foo", scope: null, file: !1, type: !7, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +diff --git a/llvm/test/ExecutionEngine/OrcLazy/emulated-tls.ll b/llvm/test/ExecutionEngine/OrcLazy/emulated-tls.ll +index 4b603cd29..51a5905fe 100644 +--- a/llvm/test/ExecutionEngine/OrcLazy/emulated-tls.ll ++++ b/llvm/test/ExecutionEngine/OrcLazy/emulated-tls.ll +@@ -1,3 +1,6 @@ ++; LoongArch does not support emulated tls. ++; UNSUPPORTED: target=loongarch{{.*}} ++ + ; RUN: not lli -no-process-syms -emulated-tls -jit-kind=orc-lazy %s 2>&1 \ + ; RUN: | FileCheck %s + ; +diff --git a/llvm/test/ExecutionEngine/OrcLazy/lit.local.cfg b/llvm/test/ExecutionEngine/OrcLazy/lit.local.cfg +index 4161b4f3c..3a3d23f2b 100644 +--- a/llvm/test/ExecutionEngine/OrcLazy/lit.local.cfg ++++ b/llvm/test/ExecutionEngine/OrcLazy/lit.local.cfg +@@ -1,6 +1,8 @@ + import sys + +-if config.root.host_arch not in ['i386', 'x86', 'x86_64', 'AMD64', 'mips', 'mipsel', 'mips64', 'mips64el']: ++if config.root.host_arch not in ['i386', 'x86', 'x86_64', 'AMD64', ++ 'mips', 'mipsel', 'mips64', 'mips64el', ++ 'loongarch64']: + config.unsupported = True + + # FIXME: These tests don't pass with the COFF rtld. +diff --git a/llvm/test/ExecutionEngine/frem.ll b/llvm/test/ExecutionEngine/frem.ll +index b8739c249..d33e4fca8 100644 +--- a/llvm/test/ExecutionEngine/frem.ll ++++ b/llvm/test/ExecutionEngine/frem.ll +@@ -1,3 +1,6 @@ ++; LoongArch does not support mcjit. ++; UNSUPPORTED: target=loongarch{{.*}} ++ + ; LLI.exe used to crash on Windows\X86 when certain single precession + ; floating point intrinsics (defined as macros) are used. + ; This unit test guards against the failure. +diff --git a/llvm/test/ExecutionEngine/lit.local.cfg b/llvm/test/ExecutionEngine/lit.local.cfg +index e71e7cf3c..b00ef0dcb 100644 +--- a/llvm/test/ExecutionEngine/lit.local.cfg ++++ b/llvm/test/ExecutionEngine/lit.local.cfg +@@ -1,4 +1,4 @@ +-if config.root.native_target in ['Sparc', 'PowerPC', 'SystemZ', 'Hexagon', 'RISCV', 'LoongArch']: ++if config.root.native_target in ['Sparc', 'PowerPC', 'SystemZ', 'Hexagon', 'RISCV']: + config.unsupported = True + + # ExecutionEngine tests are not expected to pass in a cross-compilation setup. +diff --git a/llvm/test/ExecutionEngine/mov64zext32.ll b/llvm/test/ExecutionEngine/mov64zext32.ll +index bba1a1987..43bd0fb2f 100644 +--- a/llvm/test/ExecutionEngine/mov64zext32.ll ++++ b/llvm/test/ExecutionEngine/mov64zext32.ll +@@ -1,3 +1,6 @@ ++; LoongArch does not support mcjit. ++; UNSUPPORTED: target=loongarch{{.*}} ++ + ; RUN: %lli -jit-kind=mcjit %s > /dev/null + ; RUN: %lli %s > /dev/null + +diff --git a/llvm/test/ExecutionEngine/test-interp-vec-arithm_float.ll b/llvm/test/ExecutionEngine/test-interp-vec-arithm_float.ll +index 6f784265a..99d95791c 100644 +--- a/llvm/test/ExecutionEngine/test-interp-vec-arithm_float.ll ++++ b/llvm/test/ExecutionEngine/test-interp-vec-arithm_float.ll +@@ -1,3 +1,6 @@ ++; LoongArch does not support mcjit. ++; UNSUPPORTED: target=loongarch{{.*}} ++ + ; RUN: %lli -jit-kind=mcjit %s > /dev/null + ; RUN: %lli %s > /dev/null + +diff --git a/llvm/test/ExecutionEngine/test-interp-vec-arithm_int.ll b/llvm/test/ExecutionEngine/test-interp-vec-arithm_int.ll +index 6896af83c..2e5592d4d 100644 +--- a/llvm/test/ExecutionEngine/test-interp-vec-arithm_int.ll ++++ b/llvm/test/ExecutionEngine/test-interp-vec-arithm_int.ll +@@ -1,3 +1,6 @@ ++; LoongArch does not support mcjit. ++; UNSUPPORTED: target=loongarch{{.*}} ++ + ; RUN: %lli -jit-kind=mcjit %s > /dev/null + ; RUN: %lli %s > /dev/null + +diff --git a/llvm/test/ExecutionEngine/test-interp-vec-logical.ll b/llvm/test/ExecutionEngine/test-interp-vec-logical.ll +index f654120ea..1e11659b1 100644 +--- a/llvm/test/ExecutionEngine/test-interp-vec-logical.ll ++++ b/llvm/test/ExecutionEngine/test-interp-vec-logical.ll +@@ -1,3 +1,6 @@ ++; LoongArch does not support mcjit. ++; UNSUPPORTED: target=loongarch{{.*}} ++ + ; RUN: %lli -jit-kind=mcjit %s > /dev/null + ; RUN: %lli %s > /dev/null + +diff --git a/llvm/test/ExecutionEngine/test-interp-vec-setcond-fp.ll b/llvm/test/ExecutionEngine/test-interp-vec-setcond-fp.ll +index 84bdec1cf..e919550de 100644 +--- a/llvm/test/ExecutionEngine/test-interp-vec-setcond-fp.ll ++++ b/llvm/test/ExecutionEngine/test-interp-vec-setcond-fp.ll +@@ -1,3 +1,6 @@ ++; LoongArch does not support mcjit. ++; UNSUPPORTED: target=loongarch{{.*}} ++ + ; RUN: %lli -jit-kind=mcjit %s > /dev/null + ; RUN: %lli %s > /dev/null + +diff --git a/llvm/test/ExecutionEngine/test-interp-vec-setcond-int.ll b/llvm/test/ExecutionEngine/test-interp-vec-setcond-int.ll +index 5a20fc4f1..9862d6af1 100644 +--- a/llvm/test/ExecutionEngine/test-interp-vec-setcond-int.ll ++++ b/llvm/test/ExecutionEngine/test-interp-vec-setcond-int.ll +@@ -1,3 +1,6 @@ ++; LoongArch does not support mcjit. ++; UNSUPPORTED: target=loongarch{{.*}} ++ + ; RUN: %lli -jit-kind=mcjit %s > /dev/null + ; RUN: %lli %s > /dev/null + +diff --git a/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll b/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll +new file mode 100644 +index 000000000..8a4ab5958 +--- /dev/null ++++ b/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll +@@ -0,0 +1,78 @@ ++; RUN: opt < %s -S -passes=msan 2>&1 | FileCheck %s ++ ++target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" ++target triple = "loongarch64-unknown-linux-gnu" ++ ++;; First, check allocation of the save area. ++declare void @llvm.lifetime.start.p0(i64, ptr nocapture) #1 ++declare void @llvm.va_start(ptr) #2 ++declare void @llvm.va_end(ptr) #2 ++declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #1 ++define i32 @foo(i32 %guard, ...) { ++; CHECK-LABEL: @foo ++; CHECK: [[TMP1:%.*]] = load {{.*}} @__msan_va_arg_overflow_size_tls ++; CHECK: [[TMP2:%.*]] = add i64 0, [[TMP1]] ++; CHECK: [[TMP3:%.*]] = alloca {{.*}} [[TMP2]] ++; CHECK: call void @llvm.memset.p0.i64(ptr align 8 [[TMP3]], i8 0, i64 [[TMP2]], i1 false) ++; CHECK: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP2]], i64 800) ++; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP3]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP4]], i1 false) ++; ++ %vl = alloca ptr, align 8 ++ call void @llvm.lifetime.start.p0(i64 32, ptr %vl) ++ call void @llvm.va_start(ptr %vl) ++ call void @llvm.va_end(ptr %vl) ++ call void @llvm.lifetime.end.p0(i64 32, ptr %vl) ++ ret i32 0 ++} ++ ++;; Save the incoming shadow value from the arguments in the __msan_va_arg_tls ++;; array. ++define i32 @bar() { ++; CHECK-LABEL: @bar ++; CHECK: store i32 0, ptr @__msan_va_arg_tls, align 8 ++; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 ++; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8 ++; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls ++; ++ %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00) ++ ret i32 %1 ++} ++ ++;; Check multiple fixed arguments. ++declare i32 @foo2(i32 %g1, i32 %g2, ...) ++define i32 @bar2() { ++; CHECK-LABEL: @bar2 ++; CHECK: store i64 0, ptr @__msan_va_arg_tls, align 8 ++; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 ++; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls ++; ++ %1 = call i32 (i32, i32, ...) @foo2(i32 0, i32 1, i64 2, double 3.000000e+00) ++ ret i32 %1 ++} ++ ++;; Test that MSan doesn't generate code overflowing __msan_va_arg_tls when too many arguments are ++;; passed to a variadic function. ++declare i64 @sum(i64 %n, ...) ++define dso_local i64 @many_args() { ++;; If the size of __msan_va_arg_tls changes the second argument of `add` must also be changed. ++; CHECK-LABEL: @many_args ++; CHECK: i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 792) ++; CHECK-NOT: i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 800) ++; ++entry: ++ %ret = call i64 (i64, ...) @sum(i64 120, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, ++ i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1 ++ ) ++ ret i64 %ret ++} +diff --git a/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg.ll b/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg.ll +new file mode 100644 +index 000000000..dcbe2a242 +--- /dev/null ++++ b/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg.ll +@@ -0,0 +1,14 @@ ++; RUN: opt < %s -msan-check-access-address=0 -S -passes=msan 2>&1 ++; Test that code using va_start can be compiled on LoongArch. ++ ++target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" ++target triple = "loongarch64-unknown-linux-gnu" ++ ++define void @VaStart(ptr %s, ...) { ++entry: ++ %vl = alloca ptr, align 4 ++ call void @llvm.va_start(ptr %vl) ++ ret void ++} ++ ++declare void @llvm.va_start(ptr) +diff --git a/llvm/test/MC/LoongArch/Relocations/sub-expr.s b/llvm/test/MC/LoongArch/Relocations/sub-expr.s +new file mode 100644 +index 000000000..0179e1027 +--- /dev/null ++++ b/llvm/test/MC/LoongArch/Relocations/sub-expr.s +@@ -0,0 +1,28 @@ ++# RUN: llvm-mc --filetype=obj --triple=loongarch64 %s -o %t ++# RUN: llvm-readobj -r %t | FileCheck %s ++ ++## Check that subtraction expressions emit R_LARCH_32_PCREL and R_LARCH_64_PCREL relocations. ++ ++## TODO: 1- or 2-byte data relocations are not supported for now. ++ ++# CHECK: Relocations [ ++# CHECK-NEXT: Section ({{.*}}) .rela.data { ++# CHECK-NEXT: 0x0 R_LARCH_64_PCREL sx 0x0 ++# CHECK-NEXT: 0x8 R_LARCH_64_PCREL sy 0x0 ++# CHECK-NEXT: 0x10 R_LARCH_32_PCREL sx 0x0 ++# CHECK-NEXT: 0x14 R_LARCH_32_PCREL sy 0x0 ++# CHECK-NEXT: } ++ ++.section sx,"a" ++x: ++nop ++ ++.data ++.8byte x-. ++.8byte y-. ++.4byte x-. ++.4byte y-. ++ ++.section sy,"a" ++y: ++nop +diff --git a/llvm/test/tools/llvm-objcopy/ELF/binary-output-target.test b/llvm/test/tools/llvm-objcopy/ELF/binary-output-target.test +index 78fc14355..fc5856691 100644 +--- a/llvm/test/tools/llvm-objcopy/ELF/binary-output-target.test ++++ b/llvm/test/tools/llvm-objcopy/ELF/binary-output-target.test +@@ -42,6 +42,12 @@ + # RUN: llvm-objcopy -I binary -O elf32-hexagon %t.txt %t.hexagon.o + # RUN: llvm-readobj --file-headers %t.hexagon.o | FileCheck %s --check-prefixes=CHECK,LE,HEXAGON,32 + ++# RUN: llvm-objcopy -I binary -O elf32-loongarch %t.txt %t.la32.o ++# RUN: llvm-readobj --file-headers %t.la32.o | FileCheck %s --check-prefixes=CHECK,LE,LA32,32 ++ ++# RUN: llvm-objcopy -I binary -O elf64-loongarch %t.txt %t.la64.o ++# RUN: llvm-readobj --file-headers %t.la64.o | FileCheck %s --check-prefixes=CHECK,LE,LA64,64 ++ + # CHECK: Format: + # 32-SAME: elf32- + # 64-SAME: elf64- +@@ -49,6 +55,8 @@ + # ARM-SAME: littlearm + # HEXAGON-SAME: hexagon + # I386-SAME: i386 ++# LA32-SAME: loongarch{{$}} ++# LA64-SAME: loongarch{{$}} + # MIPS-SAME: mips{{$}} + # RISCV32-SAME: riscv{{$}} + # RISCV64-SAME: riscv{{$}} +@@ -62,6 +70,8 @@ + # ARM-NEXT: Arch: arm + # HEXAGON-NEXT: Arch: hexagon + # I386-NEXT: Arch: i386 ++# LA32-NEXT: Arch: loongarch32 ++# LA64-NEXT: Arch: loongarch64 + # MIPS-NEXT: Arch: mips{{$}} + # PPC32BE-NEXT: Arch: powerpc{{$}} + # PPC32LE-NEXT: Arch: powerpcle{{$}} +@@ -97,6 +107,8 @@ + # ARM-NEXT: Machine: EM_ARM (0x28) + # HEXAGON-NEXT: Machine: EM_HEXAGON (0xA4) + # I386-NEXT: Machine: EM_386 (0x3) ++# LA32-NEXT: Machine: EM_LOONGARCH (0x102) ++# LA64-NEXT: Machine: EM_LOONGARCH (0x102) + # MIPS-NEXT: Machine: EM_MIPS (0x8) + # PPC32-NEXT: Machine: EM_PPC (0x14) + # PPC64-NEXT: Machine: EM_PPC64 (0x15) +diff --git a/llvm/test/tools/llvm-objcopy/ELF/cross-arch-headers.test b/llvm/test/tools/llvm-objcopy/ELF/cross-arch-headers.test +index 98f1b3c64..882940c05 100644 +--- a/llvm/test/tools/llvm-objcopy/ELF/cross-arch-headers.test ++++ b/llvm/test/tools/llvm-objcopy/ELF/cross-arch-headers.test +@@ -109,6 +109,14 @@ + # RUN: llvm-readobj --file-headers %t.elf32_hexagon.o | FileCheck %s --check-prefixes=CHECK,LE,HEXAGON,32,SYSV + # RUN: llvm-readobj --file-headers %t.elf32_hexagon.dwo | FileCheck %s --check-prefixes=CHECK,LE,HEXAGON,32,SYSV + ++# RUN: llvm-objcopy %t.o -O elf32-loongarch %t.elf32_loongarch.o --split-dwo=%t.elf32_loongarch.dwo ++# RUN: llvm-readobj --file-headers %t.elf32_loongarch.o | FileCheck %s --check-prefixes=CHECK,LE,LA32,32,SYSV ++# RUN: llvm-readobj --file-headers %t.elf32_loongarch.dwo | FileCheck %s --check-prefixes=CHECK,LE,LA32,32,SYSV ++ ++# RUN: llvm-objcopy %t.o -O elf64-loongarch %t.elf64_loongarch.o --split-dwo=%t.elf64_loongarch.dwo ++# RUN: llvm-readobj --file-headers %t.elf64_loongarch.o | FileCheck %s --check-prefixes=CHECK,LE,LA64,64,SYSV ++# RUN: llvm-readobj --file-headers %t.elf64_loongarch.dwo | FileCheck %s --check-prefixes=CHECK,LE,LA64,64,SYSV ++ + !ELF + FileHeader: + Class: ELFCLASS32 +@@ -144,6 +152,8 @@ Symbols: + # AARCH-SAME: aarch64 + # ARM-SAME: littlearm + # HEXAGON-SAME: hexagon ++# LA32-SAME: loongarch{{$}} ++# LA64-SAME: loongarch{{$}} + # MIPS-SAME: mips + # PPCBE-SAME: powerpc{{$}} + # PPCLE-SAME: powerpcle{{$}} +@@ -158,6 +168,8 @@ Symbols: + # AARCH-NEXT: Arch: aarch64 + # ARM-NEXT: Arch: arm + # HEXAGON-NEXT: Arch: hexagon ++# LA32-NEXT: Arch: loongarch32 ++# LA64-NEXT: Arch: loongarch64 + # MIPSBE-NEXT: Arch: mips{{$}} + # MIPSLE-NEXT: Arch: mipsel{{$}} + # MIPS64BE-NEXT: Arch: mips64{{$}} +@@ -190,6 +202,8 @@ Symbols: + # HEXAGON: Machine: EM_HEXAGON (0xA4) + # I386: Machine: EM_386 (0x3) + # IAMCU: Machine: EM_IAMCU (0x6) ++# LA32: Machine: EM_LOONGARCH (0x102) ++# LA64: Machine: EM_LOONGARCH (0x102) + # MIPS: Machine: EM_MIPS (0x8) + # PPC32: Machine: EM_PPC (0x14) + # PPC64: Machine: EM_PPC64 (0x15) +diff --git a/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test b/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test +index c26fae7e8..e32dc893f 100644 +--- a/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test ++++ b/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test +@@ -93,6 +93,15 @@ + # CHECK: Type: R_LARCH_TLS_GD_HI20 (98) + # CHECK: Type: R_LARCH_32_PCREL (99) + # CHECK: Type: R_LARCH_RELAX (100) ++# CHECK: Type: R_LARCH_DELETE (101) ++# CHECK: Type: R_LARCH_ALIGN (102) ++# CHECK: Type: R_LARCH_PCREL20_S2 (103) ++# CHECK: Type: R_LARCH_CFA (104) ++# CHECK: Type: R_LARCH_ADD6 (105) ++# CHECK: Type: R_LARCH_SUB6 (106) ++# CHECK: Type: R_LARCH_ADD_ULEB128 (107) ++# CHECK: Type: R_LARCH_SUB_ULEB128 (108) ++# CHECK: Type: R_LARCH_64_PCREL (109) + + --- !ELF + FileHeader: +@@ -193,3 +202,12 @@ Sections: + - Type: R_LARCH_TLS_GD_HI20 + - Type: R_LARCH_32_PCREL + - Type: R_LARCH_RELAX ++ - Type: R_LARCH_DELETE ++ - Type: R_LARCH_ALIGN ++ - Type: R_LARCH_PCREL20_S2 ++ - Type: R_LARCH_CFA ++ - Type: R_LARCH_ADD6 ++ - Type: R_LARCH_SUB6 ++ - Type: R_LARCH_ADD_ULEB128 ++ - Type: R_LARCH_SUB_ULEB128 ++ - Type: R_LARCH_64_PCREL +diff --git a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp +index 577b83732..42bd8371d 100644 +--- a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp ++++ b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp +@@ -331,7 +331,11 @@ static const StringMap TargetMap{ + // SPARC + {"elf32-sparc", {ELF::EM_SPARC, false, false}}, + {"elf32-sparcel", {ELF::EM_SPARC, false, true}}, ++ // Hexagon + {"elf32-hexagon", {ELF::EM_HEXAGON, false, true}}, ++ // LoongArch ++ {"elf32-loongarch", {ELF::EM_LOONGARCH, false, true}}, ++ {"elf64-loongarch", {ELF::EM_LOONGARCH, true, true}}, + }; + + static Expected +diff --git a/llvm/unittests/Object/ELFTest.cpp b/llvm/unittests/Object/ELFTest.cpp +index 9cf8feb0e..35fc2ec69 100644 +--- a/llvm/unittests/Object/ELFTest.cpp ++++ b/llvm/unittests/Object/ELFTest.cpp +@@ -233,6 +233,24 @@ TEST(ELFTest, getELFRelocationTypeNameForLoongArch) { + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_32_PCREL)); + EXPECT_EQ("R_LARCH_RELAX", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_RELAX)); ++ EXPECT_EQ("R_LARCH_DELETE", ++ getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_DELETE)); ++ EXPECT_EQ("R_LARCH_ALIGN", ++ getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_ALIGN)); ++ EXPECT_EQ("R_LARCH_PCREL20_S2", ++ getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_PCREL20_S2)); ++ EXPECT_EQ("R_LARCH_CFA", ++ getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_CFA)); ++ EXPECT_EQ("R_LARCH_ADD6", ++ getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_ADD6)); ++ EXPECT_EQ("R_LARCH_SUB6", ++ getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_SUB6)); ++ EXPECT_EQ("R_LARCH_ADD_ULEB128", ++ getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_ADD_ULEB128)); ++ EXPECT_EQ("R_LARCH_SUB_ULEB128", ++ getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_SUB_ULEB128)); ++ EXPECT_EQ("R_LARCH_64_PCREL", ++ getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_64_PCREL)); + } + + TEST(ELFTest, getELFRelativeRelocationType) { +diff --git a/llvm/unittests/TargetParser/TripleTest.cpp b/llvm/unittests/TargetParser/TripleTest.cpp +index 77de43a16..b19699fc0 100644 +--- a/llvm/unittests/TargetParser/TripleTest.cpp ++++ b/llvm/unittests/TargetParser/TripleTest.cpp +@@ -1225,12 +1225,14 @@ TEST(TripleTest, BitWidthPredicates) { + EXPECT_TRUE(T.isArch32Bit()); + EXPECT_FALSE(T.isArch64Bit()); + EXPECT_TRUE(T.isLoongArch()); ++ EXPECT_TRUE(T.isLoongArch32()); + + T.setArch(Triple::loongarch64); + EXPECT_FALSE(T.isArch16Bit()); + EXPECT_FALSE(T.isArch32Bit()); + EXPECT_TRUE(T.isArch64Bit()); + EXPECT_TRUE(T.isLoongArch()); ++ EXPECT_TRUE(T.isLoongArch64()); + + T.setArch(Triple::dxil); + EXPECT_FALSE(T.isArch16Bit()); diff --git a/llvm.spec b/llvm.spec index d1913ac638d56a0df0d22601f4a6643a6d2073a4..d810e2f9e30bee42b602e7215da5edf275a47c66 100644 --- a/llvm.spec +++ b/llvm.spec @@ -1,10 +1,10 @@ -%define anolis_release 1 +%define anolis_release 2 %global toolchain clang %undefine _include_frame_pointers -%define gold_arches x86_64 aarch64 loongarch64 +%define gold_arches x86_64 aarch64 loongarch64 %ifarch %{gold_arches} %bcond_without gold %else @@ -64,6 +64,7 @@ Source2: https://github.com/llvm/llvm-project/releases/download/llvmorg-%{maj_ve Source4: https://github.com/llvm/llvm-project/releases/download/llvmorg-%{maj_ver}.%{min_ver}.%{patch_ver}/%{third_party_srcdir}.tar.xz Patch2: 0001-llvm-Add-install-targets-for-gtest.patch +Patch3: 0001-backport-LoongArch-patches.patch Patch201: 0201-third-party-Add-install-targets-for-gtest.patch BuildRequires: gcc gcc-c++ clang cmake ninja-build zlib-devel libffi-devel @@ -484,6 +485,9 @@ fi %endif %changelog +* Thu Aug 17 2023 Chen Li - 16.0.6-2 +- Backport LoongArch patches from llvmorg-17.0.0-rc2 + * Sun Jul 9 2023 Funda Wang - 16.0.6-1 - New version 16.0.6