diff --git a/0001-backport-LoongArch-patches.patch b/0001-backport-LoongArch-patches.patch
new file mode 100644
index 0000000000000000000000000000000000000000..7328bc0602b18060a1faf2218cc7316caa187778
--- /dev/null
+++ b/0001-backport-LoongArch-patches.patch
@@ -0,0 +1,4939 @@
+diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
+index 174818417..22067d184 100644
+--- a/llvm/docs/ReleaseNotes.rst
++++ b/llvm/docs/ReleaseNotes.rst
+@@ -186,6 +186,8 @@ Changes to the LoongArch Backend
+ * Initial JITLink support is added.
+   (`D141036 <https://reviews.llvm.org/D141036>`_)
+ 
++* The `lp64s` ABI is supported now and has been tested on Rust bare-matal target.
++
+ Changes to the MIPS Backend
+ ---------------------------
+ 
+diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def
+index 67dbd0201..02bce3c71 100644
+--- a/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def
++++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def
+@@ -103,3 +103,18 @@ ELF_RELOC(R_LARCH_TLS_GD_PC_HI20,   97)
+ ELF_RELOC(R_LARCH_TLS_GD_HI20,      98)
+ ELF_RELOC(R_LARCH_32_PCREL,         99)
+ ELF_RELOC(R_LARCH_RELAX,            100)
++
++// Relocs added in ELF for the LoongArch™ Architecture v20230519, part of the
++// v2.10 LoongArch ABI specs.
++//
++// Spec addition: https://github.com/loongson/la-abi-specs/pull/1
++// Binutils commit 57a930e3bfe4b2c7fd6463ed39311e1938513138
++ELF_RELOC(R_LARCH_DELETE,      101)
++ELF_RELOC(R_LARCH_ALIGN,       102)
++ELF_RELOC(R_LARCH_PCREL20_S2,  103)
++ELF_RELOC(R_LARCH_CFA,         104)
++ELF_RELOC(R_LARCH_ADD6,        105)
++ELF_RELOC(R_LARCH_SUB6,        106)
++ELF_RELOC(R_LARCH_ADD_ULEB128, 107)
++ELF_RELOC(R_LARCH_SUB_ULEB128, 108)
++ELF_RELOC(R_LARCH_64_PCREL,    109)
+diff --git a/llvm/include/llvm/TargetParser/LoongArchTargetParser.def b/llvm/include/llvm/TargetParser/LoongArchTargetParser.def
+index 4ebdcc012..b20d12495 100644
+--- a/llvm/include/llvm/TargetParser/LoongArchTargetParser.def
++++ b/llvm/include/llvm/TargetParser/LoongArchTargetParser.def
+@@ -2,8 +2,6 @@
+ #define LOONGARCH_FEATURE(NAME, KIND)
+ #endif
+ 
+-LOONGARCH_FEATURE("invalid", FK_INVALID)
+-LOONGARCH_FEATURE("none", FK_NONE)
+ LOONGARCH_FEATURE("+64bit", FK_64BIT)
+ LOONGARCH_FEATURE("+f", FK_FP32)
+ LOONGARCH_FEATURE("+d", FK_FP64)
+@@ -11,6 +9,7 @@ LOONGARCH_FEATURE("+lsx", FK_LSX)
+ LOONGARCH_FEATURE("+lasx", FK_LASX)
+ LOONGARCH_FEATURE("+lbt", FK_LBT)
+ LOONGARCH_FEATURE("+lvz", FK_LVZ)
++LOONGARCH_FEATURE("+ual", FK_UAL)
+ 
+ #undef LOONGARCH_FEATURE
+ 
+@@ -18,8 +17,7 @@ LOONGARCH_FEATURE("+lvz", FK_LVZ)
+ #define LOONGARCH_ARCH(NAME, KIND, FEATURES)
+ #endif
+ 
+-LOONGARCH_ARCH("invalid", AK_INVALID, FK_INVALID)
+-LOONGARCH_ARCH("loongarch64", AK_LOONGARCH64, FK_64BIT | FK_FP32 | FK_FP64)
+-LOONGARCH_ARCH("la464", AK_LA464, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX)
++LOONGARCH_ARCH("loongarch64", AK_LOONGARCH64, FK_64BIT | FK_FP32 | FK_FP64 | FK_UAL)
++LOONGARCH_ARCH("la464", AK_LA464, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL)
+ 
+ #undef LOONGARCH_ARCH
+diff --git a/llvm/include/llvm/TargetParser/LoongArchTargetParser.h b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h
+index 53f9073e4..028844187 100644
+--- a/llvm/include/llvm/TargetParser/LoongArchTargetParser.h
++++ b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h
+@@ -23,9 +23,6 @@ class StringRef;
+ namespace LoongArch {
+ 
+ enum FeatureKind : uint32_t {
+-  FK_INVALID = 0,
+-  FK_NONE = 1,
+-
+   // 64-bit ISA is available.
+   FK_64BIT = 1 << 1,
+ 
+@@ -46,6 +43,9 @@ enum FeatureKind : uint32_t {
+ 
+   // Loongson Virtualization Extension is available.
+   FK_LVZ = 1 << 7,
++
++  // Allow memory accesses to be unaligned.
++  FK_UAL = 1 << 8,
+ };
+ 
+ struct FeatureInfo {
+@@ -64,11 +64,14 @@ struct ArchInfo {
+   uint32_t Features;
+ };
+ 
+-ArchKind parseArch(StringRef Arch);
++bool isValidArchName(StringRef Arch);
+ bool getArchFeatures(StringRef Arch, std::vector<StringRef> &Features);
++bool isValidCPUName(StringRef TuneCPU);
++void fillValidCPUList(SmallVectorImpl<StringRef> &Values);
++StringRef getDefaultArch(bool Is64Bit);
+ 
+ } // namespace LoongArch
+ 
+ } // namespace llvm
+ 
+-#endif // LLVM_SUPPORT_LOONGARCHTARGETPARSER_H
++#endif // LLVM_TARGETPARSER_LOONGARCHTARGETPARSER_H
+diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h
+index 59513fa2f..5ddb1d314 100644
+--- a/llvm/include/llvm/TargetParser/Triple.h
++++ b/llvm/include/llvm/TargetParser/Triple.h
+@@ -846,10 +846,14 @@ public:
+                : PointerWidth == 64;
+   }
+ 
++  /// Tests whether the target is 32-bit LoongArch.
++  bool isLoongArch32() const { return getArch() == Triple::loongarch32; }
++
++  /// Tests whether the target is 64-bit LoongArch.
++  bool isLoongArch64() const { return getArch() == Triple::loongarch64; }
++
+   /// Tests whether the target is LoongArch (32- and 64-bit).
+-  bool isLoongArch() const {
+-    return getArch() == Triple::loongarch32 || getArch() == Triple::loongarch64;
+-  }
++  bool isLoongArch() const { return isLoongArch32() || isLoongArch64(); }
+ 
+   /// Tests whether the target is MIPS 32-bit (little and big endian).
+   bool isMIPS32() const {
+diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+index 9a3609bc1..dc5c443ea 100644
+--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
++++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+@@ -10247,8 +10247,7 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {
+     case ISD::ADD:
+       SDValue N0 = N.getOperand(0);
+       SDValue N1 = N.getOperand(1);
+-      if (!isConstantIntBuildVectorOrConstantInt(N0) &&
+-          isConstantIntBuildVectorOrConstantInt(N1)) {
++      if (!isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1)) {
+         uint64_t Offset = N.getConstantOperandVal(1);
+ 
+         // Rewrite an ADD constant node into a DIExpression. Since we are
+diff --git a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
+index bc84988e3..e06dea9d5 100644
+--- a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
++++ b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
+@@ -802,7 +802,8 @@ LLJIT::createObjectLinkingLayer(LLJITBuilderState &S, ExecutionSession &ES) {
+ 
+   if (S.JTMB->getTargetTriple().isOSBinFormatELF() &&
+       (S.JTMB->getTargetTriple().getArch() == Triple::ArchType::ppc64 ||
+-       S.JTMB->getTargetTriple().getArch() == Triple::ArchType::ppc64le))
++       S.JTMB->getTargetTriple().getArch() == Triple::ArchType::ppc64le ||
++       S.JTMB->getTargetTriple().getArch() == Triple::ArchType::loongarch64))
+     Layer->setAutoClaimResponsibilityForObjectSymbols(true);
+ 
+   // FIXME: Explicit conversion to std::unique_ptr<ObjectLayer> added to silence
+diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+index a9aaff424..b154ea287 100644
+--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
++++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+@@ -987,6 +987,18 @@ uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr,
+     // and stubs for branches Thumb - ARM and ARM - Thumb.
+     writeBytesUnaligned(0xe51ff004, Addr, 4); // ldr pc, [pc, #-4]
+     return Addr + 4;
++  } else if (Arch == Triple::loongarch64) {
++    // lu12i.w  $t0, %abs_hi20(addr)
++    // ori      $t0, $t0, %abs_lo12(addr)
++    // lu32i.d  $t0, %abs64_lo20(addr)
++    // lu52i.d  $t0, $t0, %abs64_lo12(addr)
++    // jr       $t0
++    writeBytesUnaligned(0x1400000c, Addr, 4);
++    writeBytesUnaligned(0x0380018c, Addr + 4, 4);
++    writeBytesUnaligned(0x1600000c, Addr + 8, 4);
++    writeBytesUnaligned(0x0300018c, Addr + 12, 4);
++    writeBytesUnaligned(0x4c000180, Addr + 16, 4);
++    return Addr;
+   } else if (IsMipsO32ABI || IsMipsN32ABI) {
+     // 0:   3c190000        lui     t9,%hi(addr).
+     // 4:   27390000        addiu   t9,t9,%lo(addr).
+diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+index 2fe49fefa..f85452bee 100644
+--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
++++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+@@ -641,6 +641,102 @@ void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section,
+   }
+ }
+ 
++// Returns extract bits Val[Hi:Lo].
++static inline uint32_t extractBits(uint32_t Val, unsigned Hi, unsigned Lo) {
++  return (Val & (((1UL << (Hi + 1)) - 1))) >> Lo;
++}
++
++void RuntimeDyldELF::resolveLoongArch64Relocation(const SectionEntry &Section,
++                                                  uint64_t Offset,
++                                                  uint64_t Value, uint32_t Type,
++                                                  int64_t Addend) {
++  uint32_t *TargetPtr =
++      reinterpret_cast<uint32_t *>(Section.getAddressWithOffset(Offset));
++  uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
++
++  LLVM_DEBUG(dbgs() << "resolveLoongArch64Relocation, LocalAddress: 0x"
++                    << format("%llx", Section.getAddressWithOffset(Offset))
++                    << " FinalAddress: 0x" << format("%llx", FinalAddress)
++                    << " Value: 0x" << format("%llx", Value) << " Type: 0x"
++                    << format("%x", Type) << " Addend: 0x"
++                    << format("%llx", Addend) << "\n");
++
++  switch (Type) {
++  default:
++    report_fatal_error("Relocation type not implemented yet!");
++    break;
++  case ELF::R_LARCH_32:
++    *(support::little32_t *)TargetPtr = static_cast<uint32_t>(Value + Addend);
++    break;
++  case ELF::R_LARCH_64:
++    *(support::little64_t *)TargetPtr = Value + Addend;
++    break;
++  case ELF::R_LARCH_32_PCREL:
++    *(support::little32_t *)TargetPtr =
++        static_cast<uint32_t>(Value - FinalAddress + Addend);
++    break;
++  case ELF::R_LARCH_B26: {
++    uint64_t BranchImm = Value - FinalAddress + Addend;
++    assert(isInt<28>(BranchImm));
++    uint32_t RawInstr = *(support::little32_t *)TargetPtr;
++    uint32_t Imm = static_cast<uint32_t>(BranchImm >> 2);
++    uint32_t Imm15_0 = extractBits(Imm, /*Hi=*/15, /*Lo=*/0) << 10;
++    uint32_t Imm25_16 = extractBits(Imm, /*Hi=*/25, /*Lo=*/16);
++    *(support::little32_t *)TargetPtr = RawInstr | Imm15_0 | Imm25_16;
++    break;
++  }
++  case ELF::R_LARCH_GOT_PC_HI20:
++  case ELF::R_LARCH_PCALA_HI20: {
++    uint64_t Target = Value + Addend;
++    uint64_t TargetPage =
++        (Target + (Target & 0x800)) & ~static_cast<uint64_t>(0xfff);
++    uint64_t PCPage = FinalAddress & ~static_cast<uint64_t>(0xfff);
++    int64_t PageDelta = TargetPage - PCPage;
++    uint32_t RawInstr = *(support::little32_t *)TargetPtr;
++    uint32_t Imm31_12 = extractBits(PageDelta, /*Hi=*/31, /*Lo=*/12) << 5;
++    *(support::little32_t *)TargetPtr = RawInstr | Imm31_12;
++    break;
++  }
++  case ELF::R_LARCH_GOT_PC_LO12:
++  case ELF::R_LARCH_PCALA_LO12: {
++    // TODO: code-model=medium
++    uint64_t TargetOffset = (Value + Addend) & 0xfff;
++    uint32_t RawInstr = *(support::little32_t *)TargetPtr;
++    uint32_t Imm11_0 = TargetOffset << 10;
++    *(support::little32_t *)TargetPtr = RawInstr | Imm11_0;
++    break;
++  }
++  case ELF::R_LARCH_ABS_HI20: {
++    uint64_t Target = Value + Addend;
++    uint32_t RawInstr = *(support::little32_t *)TargetPtr;
++    uint32_t Imm31_12 = extractBits(Target, /*Hi=*/31, /*Lo=*/12) << 5;
++    *(support::little32_t *)TargetPtr = RawInstr | Imm31_12;
++    break;
++  }
++  case ELF::R_LARCH_ABS_LO12: {
++    uint64_t Target = Value + Addend;
++    uint32_t RawInstr = *(support::little32_t *)TargetPtr;
++    uint32_t Imm11_0 = extractBits(Target, /*Hi=*/11, /*Lo=*/0) << 10;
++    *(support::little32_t *)TargetPtr = RawInstr | Imm11_0;
++    break;
++  }
++  case ELF::R_LARCH_ABS64_LO20: {
++    uint64_t Target = Value + Addend;
++    uint32_t RawInstr = *(support::little32_t *)TargetPtr;
++    uint32_t Imm51_32 = extractBits(Target >> 32, /*Hi=*/19, /*Lo=*/0) << 5;
++    *(support::little32_t *)TargetPtr = RawInstr | Imm51_32;
++    break;
++  }
++  case ELF::R_LARCH_ABS64_HI12: {
++    uint64_t Target = Value + Addend;
++    uint32_t RawInstr = *(support::little32_t *)TargetPtr;
++    uint32_t Imm63_52 = extractBits(Target >> 32, /*Hi=*/31, /*Lo=*/20) << 5;
++    *(support::little32_t *)TargetPtr = RawInstr | Imm63_52;
++    break;
++  }
++  }
++}
++
+ void RuntimeDyldELF::setMipsABI(const ObjectFile &Obj) {
+   if (Arch == Triple::UnknownArch ||
+       !StringRef(Triple::getArchTypePrefix(Arch)).equals("mips")) {
+@@ -1057,6 +1153,9 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section,
+     resolveARMRelocation(Section, Offset, (uint32_t)(Value & 0xffffffffL), Type,
+                          (uint32_t)(Addend & 0xffffffffL));
+     break;
++  case Triple::loongarch64:
++    resolveLoongArch64Relocation(Section, Offset, Value, Type, Addend);
++    break;
+   case Triple::ppc: // Fall through.
+   case Triple::ppcle:
+     resolvePPC32Relocation(Section, Offset, Value, Type, Addend);
+@@ -1209,6 +1308,81 @@ void RuntimeDyldELF::resolveAArch64Branch(unsigned SectionID,
+   }
+ }
+ 
++bool RuntimeDyldELF::resolveLoongArch64ShortBranch(
++    unsigned SectionID, relocation_iterator RelI,
++    const RelocationValueRef &Value) {
++  uint64_t Address;
++  if (Value.SymbolName) {
++    auto Loc = GlobalSymbolTable.find(Value.SymbolName);
++    // Don't create direct branch for external symbols.
++    if (Loc == GlobalSymbolTable.end())
++      return false;
++    const auto &SymInfo = Loc->second;
++    Address =
++        uint64_t(Sections[SymInfo.getSectionID()].getLoadAddressWithOffset(
++            SymInfo.getOffset()));
++  } else {
++    Address = uint64_t(Sections[Value.SectionID].getLoadAddress());
++  }
++  uint64_t Offset = RelI->getOffset();
++  uint64_t SourceAddress = Sections[SectionID].getLoadAddressWithOffset(Offset);
++  if (!isInt<28>(Address + Value.Addend - SourceAddress))
++    return false;
++  resolveRelocation(Sections[SectionID], Offset, Address, RelI->getType(),
++                    Value.Addend);
++  return true;
++}
++
++void RuntimeDyldELF::resolveLoongArch64Branch(unsigned SectionID,
++                                              const RelocationValueRef &Value,
++                                              relocation_iterator RelI,
++                                              StubMap &Stubs) {
++  LLVM_DEBUG(dbgs() << "\t\tThis is an LoongArch64 branch relocation.\n");
++  SectionEntry &Section = Sections[SectionID];
++  uint64_t Offset = RelI->getOffset();
++  unsigned RelType = RelI->getType();
++  // Look for an existing stub.
++  StubMap::const_iterator i = Stubs.find(Value);
++  if (i != Stubs.end()) {
++    resolveRelocation(Section, Offset,
++                      (uint64_t)Section.getAddressWithOffset(i->second),
++                      RelType, 0);
++    LLVM_DEBUG(dbgs() << " Stub function found\n");
++  } else if (!resolveLoongArch64ShortBranch(SectionID, RelI, Value)) {
++    // Create a new stub function.
++    LLVM_DEBUG(dbgs() << " Create a new stub function\n");
++    Stubs[Value] = Section.getStubOffset();
++    uint8_t *StubTargetAddr = createStubFunction(
++        Section.getAddressWithOffset(Section.getStubOffset()));
++    RelocationEntry LU12I_W(SectionID, StubTargetAddr - Section.getAddress(),
++                            ELF::R_LARCH_ABS_HI20, Value.Addend);
++    RelocationEntry ORI(SectionID, StubTargetAddr - Section.getAddress() + 4,
++                        ELF::R_LARCH_ABS_LO12, Value.Addend);
++    RelocationEntry LU32I_D(SectionID,
++                            StubTargetAddr - Section.getAddress() + 8,
++                            ELF::R_LARCH_ABS64_LO20, Value.Addend);
++    RelocationEntry LU52I_D(SectionID,
++                            StubTargetAddr - Section.getAddress() + 12,
++                            ELF::R_LARCH_ABS64_HI12, Value.Addend);
++    if (Value.SymbolName) {
++      addRelocationForSymbol(LU12I_W, Value.SymbolName);
++      addRelocationForSymbol(ORI, Value.SymbolName);
++      addRelocationForSymbol(LU32I_D, Value.SymbolName);
++      addRelocationForSymbol(LU52I_D, Value.SymbolName);
++    } else {
++      addRelocationForSection(LU12I_W, Value.SectionID);
++      addRelocationForSection(ORI, Value.SectionID);
++      addRelocationForSection(LU32I_D, Value.SectionID);
++      addRelocationForSection(LU52I_D, Value.SectionID);
++    }
++    resolveRelocation(Section, Offset,
++                      reinterpret_cast<uint64_t>(Section.getAddressWithOffset(
++                          Section.getStubOffset())),
++                      RelType, 0);
++    Section.advanceStubOffset(getMaxStubSize());
++  }
++}
++
+ Expected<relocation_iterator>
+ RuntimeDyldELF::processRelocationRef(
+     unsigned SectionID, relocation_iterator RelI, const ObjectFile &O,
+@@ -1369,6 +1543,25 @@ RuntimeDyldELF::processRelocationRef(
+       }
+       processSimpleRelocation(SectionID, Offset, RelType, Value);
+     }
++  } else if (Arch == Triple::loongarch64) {
++    if (RelType == ELF::R_LARCH_B26 && MemMgr.allowStubAllocation()) {
++      resolveLoongArch64Branch(SectionID, Value, RelI, Stubs);
++    } else if (RelType == ELF::R_LARCH_GOT_PC_HI20 ||
++               RelType == ELF::R_LARCH_GOT_PC_LO12) {
++      // FIXME: This will create redundant got entry.
++      uint64_t GOTOffset = allocateGOTEntries(1);
++      // Create relocation for newly created GOT entry.
++      RelocationEntry RE =
++          computeGOTOffsetRE(GOTOffset, Value.Offset, ELF::R_LARCH_64);
++      if (Value.SymbolName)
++        addRelocationForSymbol(RE, Value.SymbolName);
++      else
++        addRelocationForSection(RE, Value.SectionID);
++      resolveGOTOffsetRelocation(SectionID, Offset, GOTOffset + Addend,
++                                 RelType);
++    } else {
++      processSimpleRelocation(SectionID, Offset, RelType, Value);
++    }
+   } else if (IsMipsO32ABI) {
+     uint8_t *Placeholder = reinterpret_cast<uint8_t *>(
+         computePlaceholderAddress(SectionID, Offset));
+@@ -2214,6 +2407,7 @@ size_t RuntimeDyldELF::getGOTEntrySize() {
+   case Triple::x86_64:
+   case Triple::aarch64:
+   case Triple::aarch64_be:
++  case Triple::loongarch64:
+   case Triple::ppc64:
+   case Triple::ppc64le:
+   case Triple::systemz:
+@@ -2525,6 +2719,10 @@ bool RuntimeDyldELF::relocationNeedsGot(const RelocationRef &R) const {
+     return RelTy == ELF::R_AARCH64_ADR_GOT_PAGE ||
+            RelTy == ELF::R_AARCH64_LD64_GOT_LO12_NC;
+ 
++  if (Arch == Triple::loongarch64)
++    return RelTy == ELF::R_LARCH_GOT_PC_HI20 ||
++           RelTy == ELF::R_LARCH_GOT_PC_LO12;
++
+   if (Arch == Triple::x86_64)
+     return RelTy == ELF::R_X86_64_GOTPCREL ||
+            RelTy == ELF::R_X86_64_GOTPCRELX ||
+diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+index dfdd98cb3..2c930219c 100644
+--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
++++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+@@ -48,6 +48,18 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
+   void resolveARMRelocation(const SectionEntry &Section, uint64_t Offset,
+                             uint32_t Value, uint32_t Type, int32_t Addend);
+ 
++  void resolveLoongArch64Relocation(const SectionEntry &Section,
++                                    uint64_t Offset, uint64_t Value,
++                                    uint32_t Type, int64_t Addend);
++
++  bool resolveLoongArch64ShortBranch(unsigned SectionID,
++                                     relocation_iterator RelI,
++                                     const RelocationValueRef &Value);
++
++  void resolveLoongArch64Branch(unsigned SectionID,
++                                const RelocationValueRef &Value,
++                                relocation_iterator RelI, StubMap &Stubs);
++
+   void resolvePPC32Relocation(const SectionEntry &Section, uint64_t Offset,
+                               uint64_t Value, uint32_t Type, int64_t Addend);
+ 
+@@ -69,6 +81,8 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
+       return 16;
+     else if (IsMipsN64ABI)
+       return 32;
++    if (Arch == Triple::loongarch64)
++      return 20; // lu12i.w; ori; lu32i.d; lu52i.d; jr
+     else if (Arch == Triple::ppc64 || Arch == Triple::ppc64le)
+       return 44;
+     else if (Arch == Triple::x86_64)
+diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td
+index 7e5c3563f..3e9e8b251 100644
+--- a/llvm/lib/Target/LoongArch/LoongArch.td
++++ b/llvm/lib/Target/LoongArch/LoongArch.td
+@@ -115,6 +115,11 @@ def HasLaLocalWithAbs
+       AssemblerPredicate<(all_of LaLocalWithAbs),
+                          "Expand la.local as la.abs">;
+ 
++// Unaligned memory access
++def FeatureUAL
++    : SubtargetFeature<"ual", "HasUAL", "true",
++                       "Allow memory accesses to be unaligned">;
++
+ //===----------------------------------------------------------------------===//
+ // Registers, instruction descriptions ...
+ //===----------------------------------------------------------------------===//
+@@ -128,13 +133,19 @@ include "LoongArchInstrInfo.td"
+ //===----------------------------------------------------------------------===//
+ 
+ def : ProcessorModel<"generic-la32", NoSchedModel, [Feature32Bit]>;
+-def : ProcessorModel<"generic-la64", NoSchedModel, [Feature64Bit]>;
++def : ProcessorModel<"generic-la64", NoSchedModel, [Feature64Bit, FeatureUAL]>;
++
++// Generic 64-bit processor with double-precision floating-point support.
++def : ProcessorModel<"loongarch64", NoSchedModel, [Feature64Bit,
++                                                   FeatureUAL,
++                                                   FeatureBasicD]>;
+ 
+ // Support generic for compatibility with other targets. The triple will be used
+ // to change to the appropriate la32/la64 version.
+ def : ProcessorModel<"generic", NoSchedModel, []>;
+ 
+ def : ProcessorModel<"la464", NoSchedModel, [Feature64Bit,
++                                             FeatureUAL,
+                                              FeatureExtLASX,
+                                              FeatureExtLVZ,
+                                              FeatureExtLBT]>;
+diff --git a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp
+index 6d9cb5e17..04fdd41d6 100644
+--- a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp
++++ b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp
+@@ -35,6 +35,12 @@ void LoongArchAsmPrinter::emitInstruction(const MachineInstr *MI) {
+   if (emitPseudoExpansionLowering(*OutStreamer, MI))
+     return;
+ 
++  switch (MI->getOpcode()) {
++  case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
++    LowerPATCHABLE_FUNCTION_ENTER(*MI);
++    return;
++  }
++
+   MCInst TmpInst;
+   if (!lowerLoongArchMachineInstrToMCInst(MI, TmpInst, *this))
+     EmitToStreamer(*OutStreamer, TmpInst);
+@@ -110,6 +116,22 @@ bool LoongArchAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+   return false;
+ }
+ 
++void LoongArchAsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(
++    const MachineInstr &MI) {
++  const Function &F = MF->getFunction();
++  if (F.hasFnAttribute("patchable-function-entry")) {
++    unsigned Num;
++    if (F.getFnAttribute("patchable-function-entry")
++            .getValueAsString()
++            .getAsInteger(10, Num))
++      return;
++    emitNops(Num);
++    return;
++  }
++
++  // TODO: Emit sled here once we get support for XRay.
++}
++
+ bool LoongArchAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+   AsmPrinter::runOnMachineFunction(MF);
+   return true;
+diff --git a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h
+index 23e293547..c8bf657f8 100644
+--- a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h
++++ b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h
+@@ -41,6 +41,8 @@ public:
+   bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+                              const char *ExtraCode, raw_ostream &OS) override;
+ 
++  void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI);
++
+   // tblgen'erated function.
+   bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
+                                    const MachineInstr *MI);
+diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+index a4a82bdef..19baa4b59 100644
+--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+@@ -597,13 +597,12 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
+   }
+ }
+ 
+-// Helper function that emits error message for intrinsics with chain.
++// Helper function that emits error message for intrinsics with chain and return
++// merge values of a UNDEF and the chain.
+ static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op,
+                                                   StringRef ErrorMsg,
+                                                   SelectionDAG &DAG) {
+-
+-  DAG.getContext()->emitError("argument to '" + Op->getOperationName(0) + "' " +
+-                              ErrorMsg);
++  DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
+   return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
+                             SDLoc(Op));
+ }
+@@ -613,9 +612,11 @@ LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
+                                                 SelectionDAG &DAG) const {
+   SDLoc DL(Op);
+   MVT GRLenVT = Subtarget.getGRLenVT();
+-  SDValue Op0 = Op.getOperand(0);
+-  std::string Name = Op->getOperationName(0);
+-  const StringRef ErrorMsgOOR = "out of range";
++  EVT VT = Op.getValueType();
++  SDValue Chain = Op.getOperand(0);
++  const StringRef ErrorMsgOOR = "argument out of range";
++  const StringRef ErrorMsgReqLA64 = "requires loongarch64";
++  const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
+ 
+   switch (Op.getConstantOperandVal(1)) {
+   default:
+@@ -627,115 +628,76 @@ LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
+   case Intrinsic::loongarch_crcc_w_b_w:
+   case Intrinsic::loongarch_crcc_w_h_w:
+   case Intrinsic::loongarch_crcc_w_w_w:
+-  case Intrinsic::loongarch_crcc_w_d_w: {
+-    std::string Name = Op->getOperationName(0);
+-    DAG.getContext()->emitError(Name + " requires target: loongarch64");
+-    return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op0}, DL);
+-  }
++  case Intrinsic::loongarch_crcc_w_d_w:
++    return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
+   case Intrinsic::loongarch_csrrd_w:
+   case Intrinsic::loongarch_csrrd_d: {
+     unsigned Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+-    if (!isUInt<14>(Imm))
+-      return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG);
+-    return DAG.getMergeValues(
+-        {DAG.getNode(LoongArchISD::CSRRD, DL, GRLenVT, Op0,
+-                     DAG.getConstant(Imm, DL, GRLenVT)),
+-         Op0},
+-        DL);
++    return !isUInt<14>(Imm)
++               ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
++               : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
++                             {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
+   }
+   case Intrinsic::loongarch_csrwr_w:
+   case Intrinsic::loongarch_csrwr_d: {
+     unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+-    if (!isUInt<14>(Imm))
+-      return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG);
+-    return DAG.getMergeValues(
+-        {DAG.getNode(LoongArchISD::CSRWR, DL, GRLenVT, Op0, Op.getOperand(2),
+-                     DAG.getConstant(Imm, DL, GRLenVT)),
+-         Op0},
+-        DL);
++    return !isUInt<14>(Imm)
++               ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
++               : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
++                             {Chain, Op.getOperand(2),
++                              DAG.getConstant(Imm, DL, GRLenVT)});
+   }
+   case Intrinsic::loongarch_csrxchg_w:
+   case Intrinsic::loongarch_csrxchg_d: {
+     unsigned Imm = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+-    if (!isUInt<14>(Imm))
+-      return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG);
+-    return DAG.getMergeValues(
+-        {DAG.getNode(LoongArchISD::CSRXCHG, DL, GRLenVT, Op0, Op.getOperand(2),
+-                     Op.getOperand(3), DAG.getConstant(Imm, DL, GRLenVT)),
+-         Op0},
+-        DL);
++    return !isUInt<14>(Imm)
++               ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
++               : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
++                             {Chain, Op.getOperand(2), Op.getOperand(3),
++                              DAG.getConstant(Imm, DL, GRLenVT)});
+   }
+   case Intrinsic::loongarch_iocsrrd_d: {
+-    if (Subtarget.is64Bit())
+-      return DAG.getMergeValues(
+-          {DAG.getNode(
+-               LoongArchISD::IOCSRRD_D, DL, GRLenVT, Op0,
+-               DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))),
+-           Op0},
+-          DL);
+-    else {
+-      DAG.getContext()->emitError(
+-          "llvm.loongarch.crc.w.d.w requires target: loongarch64");
+-      return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op0}, DL);
+-    }
++    return DAG.getNode(
++        LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
++        {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
+   }
+ #define IOCSRRD_CASE(NAME, NODE)                                               \
+   case Intrinsic::loongarch_##NAME: {                                          \
+-    return DAG.getMergeValues(                                                 \
+-        {DAG.getNode(LoongArchISD::NODE, DL, GRLenVT, Op0, Op.getOperand(2)),  \
+-         Op0},                                                                 \
+-        DL);                                                                   \
++    return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other},          \
++                       {Chain, Op.getOperand(2)});                             \
+   }
+     IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
+     IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
+     IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
+ #undef IOCSRRD_CASE
+   case Intrinsic::loongarch_cpucfg: {
+-    return DAG.getMergeValues(
+-        {DAG.getNode(LoongArchISD::CPUCFG, DL, GRLenVT, Op0, Op.getOperand(2)),
+-         Op0},
+-        DL);
++    return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
++                       {Chain, Op.getOperand(2)});
+   }
+   case Intrinsic::loongarch_lddir_d: {
+     unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+-    if (!isUInt<8>(Imm)) {
+-      DAG.getContext()->emitError("argument to '" + Op->getOperationName(0) +
+-                                  "' out of range");
+-      return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op0}, DL);
+-    }
+-
+-    return Op;
++    return !isUInt<8>(Imm)
++               ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
++               : Op;
+   }
+   case Intrinsic::loongarch_movfcsr2gr: {
+-    if (!Subtarget.hasBasicF()) {
+-      DAG.getContext()->emitError(
+-          "llvm.loongarch.movfcsr2gr expects basic f target feature");
+-      return DAG.getMergeValues(
+-          {DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)}, SDLoc(Op));
+-    }
++    if (!Subtarget.hasBasicF())
++      return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
+     unsigned Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+-    if (!isUInt<2>(Imm)) {
+-      DAG.getContext()->emitError("argument to '" + Op->getOperationName(0) +
+-                                  "' " + ErrorMsgOOR);
+-      return DAG.getMergeValues(
+-          {DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)}, SDLoc(Op));
+-    }
+-    return DAG.getMergeValues(
+-        {DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, Op.getValueType(),
+-                     DAG.getConstant(Imm, DL, GRLenVT)),
+-         Op.getOperand(0)},
+-        DL);
++    return !isUInt<2>(Imm)
++               ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
++               : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
++                             {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
+   }
+   }
+ }
+ 
+ // Helper function that emits error message for intrinsics with void return
+-// value.
++// value and return the chain.
+ static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg,
+                                          SelectionDAG &DAG) {
+ 
+-  DAG.getContext()->emitError("argument to '" + Op->getOperationName(0) + "' " +
+-                              ErrorMsg);
++  DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
+   return Op.getOperand(0);
+ }
+ 
+@@ -743,10 +705,13 @@ SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
+                                                      SelectionDAG &DAG) const {
+   SDLoc DL(Op);
+   MVT GRLenVT = Subtarget.getGRLenVT();
+-  SDValue Op0 = Op.getOperand(0);
++  SDValue Chain = Op.getOperand(0);
+   uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
+   SDValue Op2 = Op.getOperand(2);
+-  const StringRef ErrorMsgOOR = "out of range";
++  const StringRef ErrorMsgOOR = "argument out of range";
++  const StringRef ErrorMsgReqLA64 = "requires loongarch64";
++  const StringRef ErrorMsgReqLA32 = "requires loongarch32";
++  const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
+ 
+   switch (IntrinsicEnum) {
+   default:
+@@ -754,122 +719,93 @@ SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
+     return SDValue();
+   case Intrinsic::loongarch_cacop_d:
+   case Intrinsic::loongarch_cacop_w: {
+-    if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit()) {
+-      DAG.getContext()->emitError(
+-          "llvm.loongarch.cacop.d requires target: loongarch64");
+-      return Op.getOperand(0);
+-    }
+-    if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit()) {
+-      DAG.getContext()->emitError(
+-          "llvm.loongarch.cacop.w requires target: loongarch32");
+-      return Op.getOperand(0);
+-    }
++    if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
++      return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
++    if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
++      return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
+     // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
+     unsigned Imm1 = cast<ConstantSDNode>(Op2)->getZExtValue();
+-    if (!isUInt<5>(Imm1))
+-      return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
+-    SDValue Op4 = Op.getOperand(4);
+-    int Imm2 = cast<ConstantSDNode>(Op4)->getSExtValue();
+-    if (!isInt<12>(Imm2))
++    int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
++    if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
+       return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
+-
+     return Op;
+   }
+-
+   case Intrinsic::loongarch_dbar: {
+     unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
+-    if (!isUInt<15>(Imm))
+-      return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
+-
+-    return DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Op0,
+-                       DAG.getConstant(Imm, DL, GRLenVT));
++    return !isUInt<15>(Imm)
++               ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
++               : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
++                             DAG.getConstant(Imm, DL, GRLenVT));
+   }
+   case Intrinsic::loongarch_ibar: {
+     unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
+-    if (!isUInt<15>(Imm))
+-      return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
+-
+-    return DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Op0,
+-                       DAG.getConstant(Imm, DL, GRLenVT));
++    return !isUInt<15>(Imm)
++               ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
++               : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
++                             DAG.getConstant(Imm, DL, GRLenVT));
+   }
+   case Intrinsic::loongarch_break: {
+     unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
+-    if (!isUInt<15>(Imm))
+-      return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
+-
+-    return DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Op0,
+-                       DAG.getConstant(Imm, DL, GRLenVT));
++    return !isUInt<15>(Imm)
++               ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
++               : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
++                             DAG.getConstant(Imm, DL, GRLenVT));
+   }
+   case Intrinsic::loongarch_movgr2fcsr: {
+-    if (!Subtarget.hasBasicF()) {
+-      DAG.getContext()->emitError(
+-          "llvm.loongarch.movgr2fcsr expects basic f target feature");
+-      return Op0;
+-    }
++    if (!Subtarget.hasBasicF())
++      return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
+     unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
+-    if (!isUInt<2>(Imm))
+-      return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
+-
+-    return DAG.getNode(
+-        LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Op0,
+-        DAG.getConstant(Imm, DL, GRLenVT),
+-        DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Op.getOperand(3)));
++    return !isUInt<2>(Imm)
++               ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
++               : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
++                             DAG.getConstant(Imm, DL, GRLenVT),
++                             DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
++                                         Op.getOperand(3)));
+   }
+   case Intrinsic::loongarch_syscall: {
+     unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
+-    if (!isUInt<15>(Imm))
+-      return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
+-
+-    return DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Op0,
+-                       DAG.getConstant(Imm, DL, GRLenVT));
++    return !isUInt<15>(Imm)
++               ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
++               : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
++                             DAG.getConstant(Imm, DL, GRLenVT));
+   }
+ #define IOCSRWR_CASE(NAME, NODE)                                               \
+   case Intrinsic::loongarch_##NAME: {                                          \
+     SDValue Op3 = Op.getOperand(3);                                            \
+-    if (Subtarget.is64Bit())                                                   \
+-      return DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Op0,              \
+-                         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),      \
+-                         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3));     \
+-    else                                                                       \
+-      return DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Op0, Op2, Op3);   \
++    return Subtarget.is64Bit()                                                 \
++               ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain,        \
++                             DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),  \
++                             DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3))  \
++               : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2,   \
++                             Op3);                                             \
+   }
+     IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
+     IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
+     IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
+ #undef IOCSRWR_CASE
+   case Intrinsic::loongarch_iocsrwr_d: {
+-    if (Subtarget.is64Bit())
+-      return DAG.getNode(
+-          LoongArchISD::IOCSRWR_D, DL, MVT::Other, Op0, Op2,
+-          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(3)));
+-    else {
+-      DAG.getContext()->emitError(
+-          "llvm.loongarch.iocsrwr.d requires target: loongarch64");
+-      return Op.getOperand(0);
+-    }
++    return !Subtarget.is64Bit()
++               ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
++               : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
++                             Op2,
++                             DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
++                                         Op.getOperand(3)));
+   }
+ #define ASRT_LE_GT_CASE(NAME)                                                  \
+   case Intrinsic::loongarch_##NAME: {                                          \
+-    if (!Subtarget.is64Bit()) {                                                \
+-      DAG.getContext()->emitError(Op->getOperationName(0) +                    \
+-                                  " requires target: loongarch64");            \
+-      return Op.getOperand(0);                                                 \
+-    }                                                                          \
+-    return Op;                                                                 \
++    return !Subtarget.is64Bit()                                                \
++               ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)           \
++               : Op;                                                           \
+   }
+     ASRT_LE_GT_CASE(asrtle_d)
+     ASRT_LE_GT_CASE(asrtgt_d)
+ #undef ASRT_LE_GT_CASE
+   case Intrinsic::loongarch_ldpte_d: {
+     unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+-    if (!isUInt<8>(Imm))
+-      return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
+-    if (!Subtarget.is64Bit()) {
+-      DAG.getContext()->emitError(Op->getOperationName(0) +
+-                                  " requires target: loongarch64");
+-      return Op.getOperand(0);
+-    }
+-    return Op;
++    return !Subtarget.is64Bit()
++               ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
++           : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
++                             : Op;
+   }
+   }
+ }
+@@ -1022,6 +958,16 @@ static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp,
+   return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
+ }
+ 
++// Helper function that emits error message for intrinsics with chain and return
++// a UNDEF and the chain as the results.
++static void emitErrorAndReplaceIntrinsicWithChainResults(
++    SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG,
++    StringRef ErrorMsg) {
++  DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
++  Results.push_back(DAG.getUNDEF(N->getValueType(0)));
++  Results.push_back(N->getOperand(0));
++}
++
+ void LoongArchTargetLowering::ReplaceNodeResults(
+     SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
+   SDLoc DL(N);
+@@ -1142,50 +1088,44 @@ void LoongArchTargetLowering::ReplaceNodeResults(
+     break;
+   }
+   case ISD::INTRINSIC_W_CHAIN: {
+-    SDValue Op0 = N->getOperand(0);
+-    EVT VT = N->getValueType(0);
+-    uint64_t Op1 = N->getConstantOperandVal(1);
++    SDValue Chain = N->getOperand(0);
++    SDValue Op2 = N->getOperand(2);
+     MVT GRLenVT = Subtarget.getGRLenVT();
+-    if (Op1 == Intrinsic::loongarch_movfcsr2gr) {
++    const StringRef ErrorMsgOOR = "argument out of range";
++    const StringRef ErrorMsgReqLA64 = "requires loongarch64";
++    const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
++
++    switch (N->getConstantOperandVal(1)) {
++    default:
++      llvm_unreachable("Unexpected Intrinsic.");
++    case Intrinsic::loongarch_movfcsr2gr: {
+       if (!Subtarget.hasBasicF()) {
+-        DAG.getContext()->emitError(
+-            "llvm.loongarch.movfcsr2gr expects basic f target feature");
+-        Results.push_back(DAG.getMergeValues(
+-            {DAG.getUNDEF(N->getValueType(0)), N->getOperand(0)}, SDLoc(N)));
+-        Results.push_back(N->getOperand(0));
++        emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG,
++                                                     ErrorMsgReqF);
+         return;
+       }
+-      unsigned Imm = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
++      unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
+       if (!isUInt<2>(Imm)) {
+-        DAG.getContext()->emitError("argument to '" + N->getOperationName(0) +
+-                                    "' " + "out of range");
+-        Results.push_back(DAG.getMergeValues(
+-            {DAG.getUNDEF(N->getValueType(0)), N->getOperand(0)}, SDLoc(N)));
+-        Results.push_back(N->getOperand(0));
++        emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG,
++                                                     ErrorMsgOOR);
+         return;
+       }
++      SDValue MOVFCSR2GRResults = DAG.getNode(
++          LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
++          {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
+       Results.push_back(
+-          DAG.getNode(ISD::TRUNCATE, DL, VT,
+-                      DAG.getNode(LoongArchISD::MOVFCSR2GR, SDLoc(N), MVT::i64,
+-                                  DAG.getConstant(Imm, DL, GRLenVT))));
+-      Results.push_back(N->getOperand(0));
+-      return;
++          DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
++      Results.push_back(MOVFCSR2GRResults.getValue(1));
++      break;
+     }
+-    SDValue Op2 = N->getOperand(2);
+-    std::string Name = N->getOperationName(0);
+-
+-    switch (Op1) {
+-    default:
+-      llvm_unreachable("Unexpected Intrinsic.");
+ #define CRC_CASE_EXT_BINARYOP(NAME, NODE)                                      \
+   case Intrinsic::loongarch_##NAME: {                                          \
+-    Results.push_back(DAG.getNode(                                             \
+-        ISD::TRUNCATE, DL, VT,                                                 \
+-        DAG.getNode(                                                           \
+-            LoongArchISD::NODE, DL, MVT::i64,                                  \
+-            DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),                   \
+-            DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)))));   \
+-    Results.push_back(N->getOperand(0));                                       \
++    SDValue NODE = DAG.getNode(                                                \
++        LoongArchISD::NODE, DL, {MVT::i64, MVT::Other},                        \
++        {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),               \
++         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))});       \
++    Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0)));   \
++    Results.push_back(NODE.getValue(1));                                       \
+     break;                                                                     \
+   }
+       CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
+@@ -1198,12 +1138,12 @@ void LoongArchTargetLowering::ReplaceNodeResults(
+ 
+ #define CRC_CASE_EXT_UNARYOP(NAME, NODE)                                       \
+   case Intrinsic::loongarch_##NAME: {                                          \
+-    Results.push_back(                                                         \
+-        DAG.getNode(ISD::TRUNCATE, DL, VT,                                     \
+-                    DAG.getNode(LoongArchISD::NODE, DL, MVT::i64, Op2,         \
+-                                DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,     \
+-                                            N->getOperand(3)))));              \
+-    Results.push_back(N->getOperand(0));                                       \
++    SDValue NODE = DAG.getNode(                                                \
++        LoongArchISD::NODE, DL, {MVT::i64, MVT::Other},                        \
++        {Chain, Op2,                                                           \
++         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))});       \
++    Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0)));   \
++    Results.push_back(NODE.getValue(1));                                                  \
+     break;                                                                     \
+   }
+       CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
+@@ -1211,11 +1151,9 @@ void LoongArchTargetLowering::ReplaceNodeResults(
+ #undef CRC_CASE_EXT_UNARYOP
+ #define CSR_CASE(ID)                                                           \
+   case Intrinsic::loongarch_##ID: {                                            \
+-    if (!Subtarget.is64Bit()) {                                                \
+-      DAG.getContext()->emitError(Name + " requires target: loongarch64");     \
+-      Results.push_back(DAG.getUNDEF(VT));                                     \
+-      Results.push_back(N->getOperand(0));                                     \
+-    }                                                                          \
++    if (!Subtarget.is64Bit())                                                  \
++      emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG,            \
++                                                   ErrorMsgReqLA64);           \
+     break;                                                                     \
+   }
+       CSR_CASE(csrrd_d);
+@@ -1226,62 +1164,59 @@ void LoongArchTargetLowering::ReplaceNodeResults(
+     case Intrinsic::loongarch_csrrd_w: {
+       unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
+       if (!isUInt<14>(Imm)) {
+-        DAG.getContext()->emitError("argument to '" + Name + "' out of range");
+-        Results.push_back(DAG.getUNDEF(VT));
+-        Results.push_back(N->getOperand(0));
+-        break;
++        emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG,
++                                                     ErrorMsgOOR);
++        return;
+       }
+-
++      SDValue CSRRDResults =
++          DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
++                      {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
+       Results.push_back(
+-          DAG.getNode(ISD::TRUNCATE, DL, VT,
+-                      DAG.getNode(LoongArchISD::CSRRD, DL, GRLenVT, Op0,
+-                                  DAG.getConstant(Imm, DL, GRLenVT))));
+-      Results.push_back(N->getOperand(0));
++          DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
++      Results.push_back(CSRRDResults.getValue(1));
+       break;
+     }
+     case Intrinsic::loongarch_csrwr_w: {
+       unsigned Imm = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
+       if (!isUInt<14>(Imm)) {
+-        DAG.getContext()->emitError("argument to '" + Name + "' out of range");
+-        Results.push_back(DAG.getUNDEF(VT));
+-        Results.push_back(N->getOperand(0));
+-        break;
++        emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG,
++                                                     ErrorMsgOOR);
++        return;
+       }
+-
+-      Results.push_back(DAG.getNode(
+-          ISD::TRUNCATE, DL, VT,
+-          DAG.getNode(LoongArchISD::CSRWR, DL, GRLenVT, Op0,
+-                      DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
+-                      DAG.getConstant(Imm, DL, GRLenVT))));
+-      Results.push_back(N->getOperand(0));
++      SDValue CSRWRResults =
++          DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
++                      {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
++                       DAG.getConstant(Imm, DL, GRLenVT)});
++      Results.push_back(
++          DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
++      Results.push_back(CSRWRResults.getValue(1));
+       break;
+     }
+     case Intrinsic::loongarch_csrxchg_w: {
+       unsigned Imm = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue();
+       if (!isUInt<14>(Imm)) {
+-        DAG.getContext()->emitError("argument to '" + Name + "' out of range");
+-        Results.push_back(DAG.getUNDEF(VT));
+-        Results.push_back(N->getOperand(0));
+-        break;
++        emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG,
++                                                     ErrorMsgOOR);
++        return;
+       }
+-
+-      Results.push_back(DAG.getNode(
+-          ISD::TRUNCATE, DL, VT,
+-          DAG.getNode(
+-              LoongArchISD::CSRXCHG, DL, GRLenVT, Op0,
+-              DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
+-              DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
+-              DAG.getConstant(Imm, DL, GRLenVT))));
+-      Results.push_back(N->getOperand(0));
++      SDValue CSRXCHGResults = DAG.getNode(
++          LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
++          {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
++           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
++           DAG.getConstant(Imm, DL, GRLenVT)});
++      Results.push_back(
++          DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
++      Results.push_back(CSRXCHGResults.getValue(1));
+       break;
+     }
+ #define IOCSRRD_CASE(NAME, NODE)                                               \
+   case Intrinsic::loongarch_##NAME: {                                          \
+-    Results.push_back(DAG.getNode(                                             \
+-        ISD::TRUNCATE, DL, N->getValueType(0),                                 \
+-        DAG.getNode(LoongArchISD::NODE, DL, MVT::i64, Op0,                     \
+-                    DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2))));        \
+-    Results.push_back(N->getOperand(0));                                       \
++    SDValue IOCSRRDResults =                                                   \
++        DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other},            \
++                    {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
++    Results.push_back(                                                         \
++        DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0)));       \
++    Results.push_back(IOCSRRDResults.getValue(1));                             \
+     break;                                                                     \
+   }
+       IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
+@@ -1289,20 +1224,19 @@ void LoongArchTargetLowering::ReplaceNodeResults(
+       IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
+ #undef IOCSRRD_CASE
+     case Intrinsic::loongarch_cpucfg: {
+-      Results.push_back(DAG.getNode(
+-          ISD::TRUNCATE, DL, VT,
+-          DAG.getNode(LoongArchISD::CPUCFG, DL, GRLenVT, Op0,
+-                      DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2))));
+-      Results.push_back(Op0);
++      SDValue CPUCFGResults =
++          DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
++                      {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
++      Results.push_back(
++          DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
++      Results.push_back(CPUCFGResults.getValue(1));
+       break;
+     }
+     case Intrinsic::loongarch_lddir_d: {
+       if (!Subtarget.is64Bit()) {
+-        DAG.getContext()->emitError(N->getOperationName(0) +
+-                                    " requires target: loongarch64");
+-        Results.push_back(DAG.getUNDEF(VT));
+-        Results.push_back(Op0);
+-        break;
++        emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG,
++                                                     ErrorMsgReqLA64);
++        return;
+       }
+       break;
+     }
+@@ -1764,6 +1698,18 @@ MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
+   }
+ }
+ 
++bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses(
++    EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
++    unsigned *Fast) const {
++  if (!Subtarget.hasUAL())
++    return false;
++
++  // TODO: set reasonable speed number.
++  if (Fast)
++    *Fast = 1;
++  return true;
++}
++
+ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
+   switch ((LoongArchISD::NodeType)Opcode) {
+   case LoongArchISD::FIRST_NUMBER:
+@@ -1907,7 +1853,6 @@ static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI,
+   default:
+     llvm_unreachable("Unexpected ABI");
+   case LoongArchABI::ABI_ILP32S:
+-  case LoongArchABI::ABI_LP64S:
+   case LoongArchABI::ABI_ILP32F:
+   case LoongArchABI::ABI_LP64F:
+     report_fatal_error("Unimplemented ABI");
+@@ -1916,6 +1861,8 @@ static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI,
+   case LoongArchABI::ABI_LP64D:
+     UseGPRForFloat = !IsFixed;
+     break;
++  case LoongArchABI::ABI_LP64S:
++    break;
+   }
+ 
+   // FPR32 and FPR64 alias each other.
+diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+index 0ddcda66d..62c83384c 100644
+--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+@@ -80,7 +80,22 @@ enum NodeType : unsigned {
+   CRCC_W_D_W,
+ 
+   CSRRD,
++
++  // Write new value to CSR and return old value.
++  // Operand 0: A chain pointer.
++  // Operand 1: The new value to write.
++  // Operand 2: The address of the required CSR.
++  // Result 0: The old value of the CSR.
++  // Result 1: The new chain pointer.
+   CSRWR,
++
++  // Similar to CSRWR but with a write mask.
++  // Operand 0: A chain pointer.
++  // Operand 1: The new value to write.
++  // Operand 2: The write mask.
++  // Operand 3: The address of the required CSR.
++  // Result 0: The old value of the CSR.
++  // Result 1: The new chain pointer.
+   CSRXCHG,
+ 
+   // IOCSR access operations
+@@ -181,6 +196,11 @@ public:
+   bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
+                               SDValue C) const override;
+ 
++  bool allowsMisalignedMemoryAccesses(
++      EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
++      MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
++      unsigned *Fast = nullptr) const override;
++
+ private:
+   /// Target-specific function used to lower LoongArch calling conventions.
+   typedef bool LoongArchCCAssignFn(const DataLayout &DL, LoongArchABI::ABI ABI,
+diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
+index fbbb764b8..3e19f3e2f 100644
+--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
+@@ -17,6 +17,7 @@
+ #include "MCTargetDesc/LoongArchMCTargetDesc.h"
+ #include "MCTargetDesc/LoongArchMatInt.h"
+ #include "llvm/CodeGen/RegisterScavenging.h"
++#include "llvm/MC/MCInstBuilder.h"
+ 
+ using namespace llvm;
+ 
+@@ -28,6 +29,13 @@ LoongArchInstrInfo::LoongArchInstrInfo(LoongArchSubtarget &STI)
+                             LoongArch::ADJCALLSTACKUP),
+       STI(STI) {}
+ 
++MCInst LoongArchInstrInfo::getNop() const {
++  return MCInstBuilder(LoongArch::ANDI)
++      .addReg(LoongArch::R0)
++      .addReg(LoongArch::R0)
++      .addImm(0);
++}
++
+ void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator MBBI,
+                                      const DebugLoc &DL, MCRegister DstReg,
+diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
+index e2b80460f..cf83abf27 100644
+--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
+@@ -27,6 +27,8 @@ class LoongArchInstrInfo : public LoongArchGenInstrInfo {
+ public:
+   explicit LoongArchInstrInfo(LoongArchSubtarget &STI);
+ 
++  MCInst getNop() const override;
++
+   void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+                    const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg,
+                    bool KillSrc) const override;
+diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+index 75b2adc72..f20beee92 100644
+--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+@@ -75,21 +75,21 @@ def loongarch_srl_w : SDNode<"LoongArchISD::SRL_W", SDT_LoongArchIntBinOpW>;
+ def loongarch_rotr_w : SDNode<"LoongArchISD::ROTR_W", SDT_LoongArchIntBinOpW>;
+ def loongarch_rotl_w : SDNode<"LoongArchISD::ROTL_W", SDT_LoongArchIntBinOpW>;
+ def loongarch_crc_w_b_w
+-    : SDNode<"LoongArchISD::CRC_W_B_W", SDT_LoongArchIntBinOpW>;
++    : SDNode<"LoongArchISD::CRC_W_B_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>;
+ def loongarch_crc_w_h_w
+-    : SDNode<"LoongArchISD::CRC_W_H_W", SDT_LoongArchIntBinOpW>;
++    : SDNode<"LoongArchISD::CRC_W_H_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>;
+ def loongarch_crc_w_w_w
+-    : SDNode<"LoongArchISD::CRC_W_W_W", SDT_LoongArchIntBinOpW>;
++    : SDNode<"LoongArchISD::CRC_W_W_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>;
+ def loongarch_crc_w_d_w
+-    : SDNode<"LoongArchISD::CRC_W_D_W", SDT_LoongArchIntBinOpW>;
++    : SDNode<"LoongArchISD::CRC_W_D_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>;
+ def loongarch_crcc_w_b_w
+-    : SDNode<"LoongArchISD::CRCC_W_B_W", SDT_LoongArchIntBinOpW>;
++    : SDNode<"LoongArchISD::CRCC_W_B_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>;
+ def loongarch_crcc_w_h_w
+-    : SDNode<"LoongArchISD::CRCC_W_H_W", SDT_LoongArchIntBinOpW>;
++    : SDNode<"LoongArchISD::CRCC_W_H_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>;
+ def loongarch_crcc_w_w_w
+-    : SDNode<"LoongArchISD::CRCC_W_W_W", SDT_LoongArchIntBinOpW>;
++    : SDNode<"LoongArchISD::CRCC_W_W_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>;
+ def loongarch_crcc_w_d_w
+-    : SDNode<"LoongArchISD::CRCC_W_D_W", SDT_LoongArchIntBinOpW>;
++    : SDNode<"LoongArchISD::CRCC_W_D_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>;
+ def loongarch_bstrins
+     : SDNode<"LoongArchISD::BSTRINS", SDT_LoongArchBStrIns>;
+ def loongarch_bstrpick
+@@ -106,7 +106,8 @@ def loongarch_ibar : SDNode<"LoongArchISD::IBAR", SDT_LoongArchVI,
+                              [SDNPHasChain, SDNPSideEffect]>;
+ def loongarch_break : SDNode<"LoongArchISD::BREAK", SDT_LoongArchVI,
+                               [SDNPHasChain, SDNPSideEffect]>;
+-def loongarch_movfcsr2gr : SDNode<"LoongArchISD::MOVFCSR2GR", SDT_LoongArchMovfcsr2gr>;
++def loongarch_movfcsr2gr : SDNode<"LoongArchISD::MOVFCSR2GR", SDT_LoongArchMovfcsr2gr,
++                                  [SDNPHasChain]>;
+ def loongarch_movgr2fcsr : SDNode<"LoongArchISD::MOVGR2FCSR", SDT_LoongArchMovgr2fcsr,
+                                    [SDNPHasChain, SDNPSideEffect]>;
+ def loongarch_syscall : SDNode<"LoongArchISD::SYSCALL", SDT_LoongArchVI,
+@@ -139,7 +140,7 @@ def loongarch_iocsrwr_d : SDNode<"LoongArchISD::IOCSRWR_D",
+                                   SDT_LoongArchIocsrwr,
+                                   [SDNPHasChain, SDNPSideEffect]>;
+ def loongarch_cpucfg : SDNode<"LoongArchISD::CPUCFG", SDTUnaryOp,
+-                               [SDNPHasChain, SDNPSideEffect]>;
++                               [SDNPHasChain]>;
+ 
+ //===----------------------------------------------------------------------===//
+ // Operand and SDNode transformation definitions.
+diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp b/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp
+index d8850f656..a0136440e 100644
+--- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp
++++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp
+@@ -12,6 +12,7 @@
+ 
+ #include "LoongArchSubtarget.h"
+ #include "LoongArchFrameLowering.h"
++#include "MCTargetDesc/LoongArchBaseInfo.h"
+ 
+ using namespace llvm;
+ 
+@@ -48,8 +49,8 @@ LoongArchSubtarget &LoongArchSubtarget::initializeSubtargetDependencies(
+   if (!Is64Bit && HasLA64)
+     report_fatal_error("Feature 64bit should be used for loongarch64 target.");
+ 
+-  // TODO: ILP32{S,F} LP64{S,F}
+-  TargetABI = Is64Bit ? LoongArchABI::ABI_LP64D : LoongArchABI::ABI_ILP32D;
++  TargetABI = LoongArchABI::computeTargetABI(TT, ABIName);
++
+   return *this;
+ }
+ 
+diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
+index aa87638e4..4ff42e3b1 100644
+--- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
++++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
+@@ -42,6 +42,7 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo {
+   bool HasLaGlobalWithPcrel = false;
+   bool HasLaGlobalWithAbs = false;
+   bool HasLaLocalWithAbs = false;
++  bool HasUAL = false;
+   unsigned GRLen = 32;
+   MVT GRLenVT = MVT::i32;
+   LoongArchABI::ABI TargetABI = LoongArchABI::ABI_Unknown;
+@@ -91,6 +92,7 @@ public:
+   bool hasLaGlobalWithPcrel() const { return HasLaGlobalWithPcrel; }
+   bool hasLaGlobalWithAbs() const { return HasLaGlobalWithAbs; }
+   bool hasLaLocalWithAbs() const { return HasLaLocalWithAbs; }
++  bool hasUAL() const { return HasUAL; }
+   MVT getGRLenVT() const { return GRLenVT; }
+   unsigned getGRLen() const { return GRLen; }
+   LoongArchABI::ABI getTargetABI() const { return TargetABI; }
+diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
+index ff0804e2a..ecb68ff40 100644
+--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
+@@ -202,5 +202,5 @@ MCAsmBackend *llvm::createLoongArchAsmBackend(const Target &T,
+                                               const MCTargetOptions &Options) {
+   const Triple &TT = STI.getTargetTriple();
+   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
+-  return new LoongArchAsmBackend(STI, OSABI, TT.isArch64Bit());
++  return new LoongArchAsmBackend(STI, OSABI, TT.isArch64Bit(), Options);
+ }
+diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h
+index 0d04cecc4..ae9bb8af0 100644
+--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h
++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h
+@@ -26,11 +26,13 @@ class LoongArchAsmBackend : public MCAsmBackend {
+   const MCSubtargetInfo &STI;
+   uint8_t OSABI;
+   bool Is64Bit;
++  const MCTargetOptions &TargetOptions;
+ 
+ public:
+-  LoongArchAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit)
+-      : MCAsmBackend(support::little), STI(STI), OSABI(OSABI),
+-        Is64Bit(Is64Bit) {}
++  LoongArchAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit,
++                      const MCTargetOptions &Options)
++      : MCAsmBackend(support::little), STI(STI), OSABI(OSABI), Is64Bit(Is64Bit),
++        TargetOptions(Options) {}
+   ~LoongArchAsmBackend() override {}
+ 
+   void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+@@ -63,6 +65,7 @@ public:
+ 
+   std::unique_ptr<MCObjectTargetWriter>
+   createObjectTargetWriter() const override;
++  const MCTargetOptions &getTargetOptions() const { return TargetOptions; }
+ };
+ } // end namespace llvm
+ 
+diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp
+index de2ba2833..28404f04d 100644
+--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp
++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp
+@@ -15,11 +15,71 @@
+ #include "llvm/ADT/ArrayRef.h"
+ #include "llvm/ADT/Triple.h"
+ #include "llvm/MC/MCSubtargetInfo.h"
++#include "llvm/Support/raw_ostream.h"
+ 
+ namespace llvm {
+ 
+ namespace LoongArchABI {
+ 
++ABI computeTargetABI(const Triple &TT, StringRef ABIName) {
++  ABI ArgProvidedABI = getTargetABI(ABIName);
++  bool Is64Bit = TT.isArch64Bit();
++  ABI TripleABI;
++
++  // Figure out the ABI explicitly requested via the triple's environment type.
++  switch (TT.getEnvironment()) {
++  case llvm::Triple::EnvironmentType::GNUSF:
++    TripleABI = Is64Bit ? LoongArchABI::ABI_LP64S : LoongArchABI::ABI_ILP32S;
++    break;
++  case llvm::Triple::EnvironmentType::GNUF32:
++    TripleABI = Is64Bit ? LoongArchABI::ABI_LP64F : LoongArchABI::ABI_ILP32F;
++    break;
++
++  // Let the fallback case behave like {ILP32,LP64}D.
++  case llvm::Triple::EnvironmentType::GNUF64:
++  default:
++    TripleABI = Is64Bit ? LoongArchABI::ABI_LP64D : LoongArchABI::ABI_ILP32D;
++    break;
++  }
++
++  switch (ArgProvidedABI) {
++  case LoongArchABI::ABI_Unknown:
++    // Fallback to the triple-implied ABI if ABI name is not specified or
++    // invalid.
++    if (!ABIName.empty())
++      errs() << "'" << ABIName
++             << "' is not a recognized ABI for this target, ignoring and using "
++                "triple-implied ABI\n";
++    return TripleABI;
++
++  case LoongArchABI::ABI_ILP32S:
++  case LoongArchABI::ABI_ILP32F:
++  case LoongArchABI::ABI_ILP32D:
++    if (Is64Bit) {
++      errs() << "32-bit ABIs are not supported for 64-bit targets, ignoring "
++                "target-abi and using triple-implied ABI\n";
++      return TripleABI;
++    }
++    break;
++
++  case LoongArchABI::ABI_LP64S:
++  case LoongArchABI::ABI_LP64F:
++  case LoongArchABI::ABI_LP64D:
++    if (!Is64Bit) {
++      errs() << "64-bit ABIs are not supported for 32-bit targets, ignoring "
++                "target-abi and using triple-implied ABI\n";
++      return TripleABI;
++    }
++    break;
++  }
++
++  if (!ABIName.empty() && TT.hasEnvironment() && ArgProvidedABI != TripleABI)
++    errs() << "warning: triple-implied ABI conflicts with provided target-abi '"
++           << ABIName << "', using target-abi\n";
++
++  return ArgProvidedABI;
++}
++
+ ABI getTargetABI(StringRef ABIName) {
+   auto TargetABI = StringSwitch<ABI>(ABIName)
+                        .Case("ilp32s", ABI_ILP32S)
+diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h
+index c5f072677..cdbd1f569 100644
+--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h
++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h
+@@ -54,6 +54,7 @@ enum ABI {
+   ABI_Unknown
+ };
+ 
++ABI computeTargetABI(const Triple &TT, StringRef ABIName);
+ ABI getTargetABI(StringRef ABIName);
+ 
+ // Returns the register used to hold the stack pointer after realignment.
+diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp
+index 57330dd31..a6b9c0652 100644
+--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp
++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp
+@@ -59,7 +59,7 @@ unsigned LoongArchELFObjectWriter::getRelocType(MCContext &Ctx,
+   case FK_Data_4:
+     return IsPCRel ? ELF::R_LARCH_32_PCREL : ELF::R_LARCH_32;
+   case FK_Data_8:
+-    return ELF::R_LARCH_64;
++    return IsPCRel ? ELF::R_LARCH_64_PCREL : ELF::R_LARCH_64;
+   case LoongArch::fixup_loongarch_b16:
+     return ELF::R_LARCH_B16;
+   case LoongArch::fixup_loongarch_b21:
+diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp
+index 3410c8f42..a6e15e094 100644
+--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp
++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp
+@@ -12,6 +12,7 @@
+ 
+ #include "LoongArchELFStreamer.h"
+ #include "LoongArchAsmBackend.h"
++#include "LoongArchBaseInfo.h"
+ #include "llvm/BinaryFormat/ELF.h"
+ #include "llvm/MC/MCAssembler.h"
+ #include "llvm/MC/MCCodeEmitter.h"
+@@ -23,9 +24,10 @@ using namespace llvm;
+ LoongArchTargetELFStreamer::LoongArchTargetELFStreamer(
+     MCStreamer &S, const MCSubtargetInfo &STI)
+     : LoongArchTargetStreamer(S) {
+-  // FIXME: select appropriate ABI.
+-  setTargetABI(STI.getTargetTriple().isArch64Bit() ? LoongArchABI::ABI_LP64D
+-                                                   : LoongArchABI::ABI_ILP32D);
++  auto &MAB = static_cast<LoongArchAsmBackend &>(
++      getStreamer().getAssembler().getBackend());
++  setTargetABI(LoongArchABI::computeTargetABI(
++      STI.getTargetTriple(), MAB.getTargetOptions().getABIName()));
+ }
+ 
+ MCELFStreamer &LoongArchTargetELFStreamer::getStreamer() {
+diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
+index 80ebe0fa5..f6ea8d290 100644
+--- a/llvm/lib/TargetParser/Host.cpp
++++ b/llvm/lib/TargetParser/Host.cpp
+@@ -1448,6 +1448,20 @@ StringRef sys::getHostCPUName() {
+     return "generic";
+   }
+ }
++#elif defined(__loongarch__)
++StringRef sys::getHostCPUName() {
++  // Use processor id to detect cpu name.
++  uint32_t processor_id;
++  __asm__("cpucfg %[prid], $zero\n\t" : [prid] "=r"(processor_id));
++  switch (processor_id & 0xff00) {
++  case 0xc000: // Loongson 64bit, 4-issue
++    return "la464";
++  // TODO: Others.
++  default:
++    break;
++  }
++  return "generic";
++}
+ #elif defined(__riscv)
+ StringRef sys::getHostCPUName() {
+ #if defined(__linux__)
+@@ -1842,6 +1856,23 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
+ 
+   return true;
+ }
++#elif defined(__linux__) && defined(__loongarch__)
++#include <sys/auxv.h>
++bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
++  unsigned long hwcap = getauxval(AT_HWCAP);
++  bool HasFPU = hwcap & (1UL << 3); // HWCAP_LOONGARCH_FPU
++  uint32_t cpucfg2 = 0x2;
++  __asm__("cpucfg %[cpucfg2], %[cpucfg2]\n\t" : [cpucfg2] "+r"(cpucfg2));
++
++  Features["f"] = HasFPU && (cpucfg2 & (1U << 1)); // CPUCFG.2.FP_SP
++  Features["d"] = HasFPU && (cpucfg2 & (1U << 2)); // CPUCFG.2.FP_DP
++
++  Features["lsx"] = hwcap & (1UL << 4);  // HWCAP_LOONGARCH_LSX
++  Features["lasx"] = hwcap & (1UL << 5); // HWCAP_LOONGARCH_LASX
++  Features["lvz"] = hwcap & (1UL << 9);  // HWCAP_LOONGARCH_LVZ
++
++  return true;
++}
+ #else
+ bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; }
+ #endif
+diff --git a/llvm/lib/TargetParser/LoongArchTargetParser.cpp b/llvm/lib/TargetParser/LoongArchTargetParser.cpp
+index faa8c314f..772d24c5c 100644
+--- a/llvm/lib/TargetParser/LoongArchTargetParser.cpp
++++ b/llvm/lib/TargetParser/LoongArchTargetParser.cpp
+@@ -1,4 +1,4 @@
+-//==-- LoongArch64TargetParser - Parser for LoongArch64 features --*- C++ -*-=//
++//===-- LoongArchTargetParser - Parser for LoongArch features --*- C++ -*-====//
+ //
+ // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ // See https://llvm.org/LICENSE.txt for license information.
+@@ -27,12 +27,11 @@ const ArchInfo AllArchs[] = {
+ #include "llvm/TargetParser/LoongArchTargetParser.def"
+ };
+ 
+-LoongArch::ArchKind LoongArch::parseArch(StringRef Arch) {
++bool LoongArch::isValidArchName(StringRef Arch) {
+   for (const auto A : AllArchs)
+     if (A.Name == Arch)
+-      return A.Kind;
+-
+-  return LoongArch::ArchKind::AK_INVALID;
++      return true;
++  return false;
+ }
+ 
+ bool LoongArch::getArchFeatures(StringRef Arch,
+@@ -40,10 +39,22 @@ bool LoongArch::getArchFeatures(StringRef Arch,
+   for (const auto A : AllArchs) {
+     if (A.Name == Arch) {
+       for (const auto F : AllFeatures)
+-        if ((A.Features & F.Kind) == F.Kind && F.Kind != FK_INVALID)
++        if ((A.Features & F.Kind) == F.Kind)
+           Features.push_back(F.Name);
+       return true;
+     }
+   }
+   return false;
+ }
++
++bool LoongArch::isValidCPUName(StringRef Name) { return isValidArchName(Name); }
++
++void LoongArch::fillValidCPUList(SmallVectorImpl<StringRef> &Values) {
++  for (const auto A : AllArchs)
++    Values.emplace_back(A.Name);
++}
++
++StringRef LoongArch::getDefaultArch(bool Is64Bit) {
++  // TODO: use a real 32-bit arch name.
++  return Is64Bit ? "loongarch64" : "";
++}
+diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+index 599eeeabc..367a2bef2 100644
+--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
++++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+@@ -492,7 +492,7 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize,
+   bool IsMIPS64 = TargetTriple.isMIPS64();
+   bool IsArmOrThumb = TargetTriple.isARM() || TargetTriple.isThumb();
+   bool IsAArch64 = TargetTriple.getArch() == Triple::aarch64;
+-  bool IsLoongArch64 = TargetTriple.getArch() == Triple::loongarch64;
++  bool IsLoongArch64 = TargetTriple.isLoongArch64();
+   bool IsRISCV64 = TargetTriple.getArch() == Triple::riscv64;
+   bool IsWindows = TargetTriple.isOSWindows();
+   bool IsFuchsia = TargetTriple.isOSFuchsia();
+diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+index fe8b8ce0d..603fa97e1 100644
+--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
++++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+@@ -434,6 +434,14 @@ static const MemoryMapParams Linux_AArch64_MemoryMapParams = {
+     0x0200000000000, // OriginBase
+ };
+ 
++// loongarch64 Linux
++static const MemoryMapParams Linux_LoongArch64_MemoryMapParams = {
++    0,              // AndMask (not used)
++    0x500000000000, // XorMask
++    0,              // ShadowBase (not used)
++    0x100000000000, // OriginBase
++};
++
+ // aarch64 FreeBSD
+ static const MemoryMapParams FreeBSD_AArch64_MemoryMapParams = {
+     0x1800000000000, // AndMask
+@@ -491,6 +499,11 @@ static const PlatformMemoryMapParams Linux_ARM_MemoryMapParams = {
+     &Linux_AArch64_MemoryMapParams,
+ };
+ 
++static const PlatformMemoryMapParams Linux_LoongArch_MemoryMapParams = {
++    nullptr,
++    &Linux_LoongArch64_MemoryMapParams,
++};
++
+ static const PlatformMemoryMapParams FreeBSD_ARM_MemoryMapParams = {
+     nullptr,
+     &FreeBSD_AArch64_MemoryMapParams,
+@@ -537,6 +550,7 @@ private:
+   friend struct VarArgAArch64Helper;
+   friend struct VarArgPowerPC64Helper;
+   friend struct VarArgSystemZHelper;
++  friend struct VarArgLoongArch64Helper;
+ 
+   void initializeModule(Module &M);
+   void initializeCallbacks(Module &M, const TargetLibraryInfo &TLI);
+@@ -986,6 +1000,9 @@ void MemorySanitizer::initializeModule(Module &M) {
+       case Triple::aarch64_be:
+         MapParams = Linux_ARM_MemoryMapParams.bits64;
+         break;
++      case Triple::loongarch64:
++        MapParams = Linux_LoongArch_MemoryMapParams.bits64;
++        break;
+       default:
+         report_fatal_error("unsupported architecture");
+       }
+@@ -5709,6 +5726,123 @@ struct VarArgSystemZHelper : public VarArgHelper {
+   }
+ };
+ 
++/// LoongArch64-specific implementation of VarArgHelper.
++struct VarArgLoongArch64Helper : public VarArgHelper {
++  Function &F;
++  MemorySanitizer &MS;
++  MemorySanitizerVisitor &MSV;
++  AllocaInst *VAArgTLSCopy = nullptr;
++  Value *VAArgSize = nullptr;
++
++  SmallVector<CallInst *, 16> VAStartInstrumentationList;
++
++  VarArgLoongArch64Helper(Function &F, MemorySanitizer &MS,
++                          MemorySanitizerVisitor &MSV)
++      : F(F), MS(MS), MSV(MSV) {}
++
++  void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
++    unsigned VAArgOffset = 0;
++    const DataLayout &DL = F.getParent()->getDataLayout();
++    for (Value *A :
++         llvm::drop_begin(CB.args(), CB.getFunctionType()->getNumParams())) {
++      Value *Base;
++      uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
++      Base = getShadowPtrForVAArgument(A->getType(), IRB, VAArgOffset, ArgSize);
++      VAArgOffset += ArgSize;
++      VAArgOffset = alignTo(VAArgOffset, 8);
++      if (!Base)
++        continue;
++      IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
++    }
++
++    Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(), VAArgOffset);
++    // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
++    // a new class member i.e. it is the total size of all VarArgs.
++    IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
++  }
++
++  /// Compute the shadow address for a given va_arg.
++  Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
++                                   unsigned ArgOffset, unsigned ArgSize) {
++    // Make sure we don't overflow __msan_va_arg_tls.
++    if (ArgOffset + ArgSize > kParamTLSSize)
++      return nullptr;
++    Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
++    Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
++    return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
++                              "_msarg");
++  }
++
++  void visitVAStartInst(VAStartInst &I) override {
++    IRBuilder<> IRB(&I);
++    VAStartInstrumentationList.push_back(&I);
++    Value *VAListTag = I.getArgOperand(0);
++    Value *ShadowPtr, *OriginPtr;
++    const Align Alignment = Align(8);
++    std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
++        VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
++    IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
++                     /* size */ 8, Alignment, false);
++  }
++
++  void visitVACopyInst(VACopyInst &I) override {
++    IRBuilder<> IRB(&I);
++    VAStartInstrumentationList.push_back(&I);
++    Value *VAListTag = I.getArgOperand(0);
++    Value *ShadowPtr, *OriginPtr;
++    const Align Alignment = Align(8);
++    std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
++        VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
++    IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
++                     /* size */ 8, Alignment, false);
++  }
++
++  void finalizeInstrumentation() override {
++    assert(!VAArgSize && !VAArgTLSCopy &&
++           "finalizeInstrumentation called twice");
++    IRBuilder<> IRB(MSV.FnPrologueEnd);
++    VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
++    Value *CopySize =
++        IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0), VAArgSize);
++
++    if (!VAStartInstrumentationList.empty()) {
++      // If there is a va_start in this function, make a backup copy of
++      // va_arg_tls somewhere in the function entry block.
++      VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
++      VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
++      IRB.CreateMemSet(VAArgTLSCopy, Constant::getNullValue(IRB.getInt8Ty()),
++                       CopySize, kShadowTLSAlignment, false);
++
++      Value *SrcSize = IRB.CreateBinaryIntrinsic(
++          Intrinsic::umin, CopySize,
++          ConstantInt::get(MS.IntptrTy, kParamTLSSize));
++      IRB.CreateMemCpy(VAArgTLSCopy, kShadowTLSAlignment, MS.VAArgTLS,
++                       kShadowTLSAlignment, SrcSize);
++    }
++
++    // Instrument va_start.
++    // Copy va_list shadow from the backup copy of the TLS contents.
++    for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
++      CallInst *OrigInst = VAStartInstrumentationList[i];
++      NextNodeIRBuilder IRB(OrigInst);
++      Value *VAListTag = OrigInst->getArgOperand(0);
++      Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
++      Value *RegSaveAreaPtrPtr =
++          IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
++                             PointerType::get(Type::getInt64PtrTy(*MS.C), 0));
++      Value *RegSaveAreaPtr =
++          IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
++      Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
++      const Align Alignment = Align(8);
++      std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
++          MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
++                                 Alignment, /*isStore*/ true);
++      IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
++                       CopySize);
++    }
++  }
++};
++
+ /// A no-op implementation of VarArgHelper.
+ struct VarArgNoOpHelper : public VarArgHelper {
+   VarArgNoOpHelper(Function &F, MemorySanitizer &MS,
+@@ -5741,6 +5875,8 @@ static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
+     return new VarArgPowerPC64Helper(Func, Msan, Visitor);
+   else if (TargetTriple.getArch() == Triple::systemz)
+     return new VarArgSystemZHelper(Func, Msan, Visitor);
++  else if (TargetTriple.getArch() == Triple::loongarch64)
++    return new VarArgLoongArch64Helper(Func, Msan, Visitor);
+   else
+     return new VarArgNoOpHelper(Func, Msan, Visitor);
+ }
+diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-common.ll b/llvm/test/CodeGen/LoongArch/calling-conv-common.ll
+new file mode 100644
+index 000000000..08fff9f8c
+--- /dev/null
++++ b/llvm/test/CodeGen/LoongArch/calling-conv-common.ll
+@@ -0,0 +1,403 @@
++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
++; RUN: llc --mtriple=loongarch64 --target-abi=lp64s < %s | FileCheck %s
++; RUN: llc --mtriple=loongarch64 --mattr=+d --target-abi=lp64d < %s | FileCheck %s
++
++;; This file contains tests that should have identical output for all ABIs, i.e.
++;; where no arguments are passed via floating point registers.
++
++;; Check that on LA64, i128 is passed in a pair of GPRs.
++define i64 @callee_i128_in_regs(i64 %a, i128 %b) nounwind {
++; CHECK-LABEL: callee_i128_in_regs:
++; CHECK:       # %bb.0:
++; CHECK-NEXT:    add.d $a0, $a0, $a1
++; CHECK-NEXT:    ret
++  %b_trunc = trunc i128 %b to i64
++  %1 = add i64 %a, %b_trunc
++  ret i64 %1
++}
++
++define i64 @caller_i128_in_regs() nounwind {
++; CHECK-LABEL: caller_i128_in_regs:
++; CHECK:       # %bb.0:
++; CHECK-NEXT:    addi.d $sp, $sp, -16
++; CHECK-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
++; CHECK-NEXT:    ori $a0, $zero, 1
++; CHECK-NEXT:    ori $a1, $zero, 2
++; CHECK-NEXT:    move $a2, $zero
++; CHECK-NEXT:    bl %plt(callee_i128_in_regs)
++; CHECK-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
++; CHECK-NEXT:    addi.d $sp, $sp, 16
++; CHECK-NEXT:    ret
++  %1 = call i64 @callee_i128_in_regs(i64 1, i128 2)
++  ret i64 %1
++}
++
++;; Check that the stack is used once the GPRs are exhausted.
++define i64 @callee_many_scalars(i8 %a, i16 %b, i32 %c, i64 %d, i128 %e, i64 %f, i128 %g, i64 %h) nounwind {
++; CHECK-LABEL: callee_many_scalars:
++; CHECK:       # %bb.0:
++; CHECK-NEXT:    ld.d $t0, $sp, 0
++; CHECK-NEXT:    xor $a5, $a5, $t0
++; CHECK-NEXT:    xor $a4, $a4, $a7
++; CHECK-NEXT:    or $a4, $a4, $a5
++; CHECK-NEXT:    bstrpick.d $a1, $a1, 15, 0
++; CHECK-NEXT:    andi $a0, $a0, 255
++; CHECK-NEXT:    add.d $a0, $a0, $a1
++; CHECK-NEXT:    bstrpick.d $a1, $a2, 31, 0
++; CHECK-NEXT:    add.d $a0, $a0, $a1
++; CHECK-NEXT:    add.d $a0, $a0, $a3
++; CHECK-NEXT:    sltui $a1, $a4, 1
++; CHECK-NEXT:    add.d $a0, $a1, $a0
++; CHECK-NEXT:    add.d $a0, $a0, $a6
++; CHECK-NEXT:    ld.d $a1, $sp, 8
++; CHECK-NEXT:    add.d $a0, $a0, $a1
++; CHECK-NEXT:    ret
++  %a_ext = zext i8 %a to i64
++  %b_ext = zext i16 %b to i64
++  %c_ext = zext i32 %c to i64
++  %1 = add i64 %a_ext, %b_ext
++  %2 = add i64 %1, %c_ext
++  %3 = add i64 %2, %d
++  %4 = icmp eq i128 %e, %g
++  %5 = zext i1 %4 to i64
++  %6 = add i64 %5, %3
++  %7 = add i64 %6, %f
++  %8 = add i64 %7, %h
++  ret i64 %8
++}
++
++define i64 @caller_many_scalars() nounwind {
++; CHECK-LABEL: caller_many_scalars:
++; CHECK:       # %bb.0:
++; CHECK-NEXT:    addi.d $sp, $sp, -32
++; CHECK-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
++; CHECK-NEXT:    ori $a0, $zero, 8
++; CHECK-NEXT:    st.d $a0, $sp, 8
++; CHECK-NEXT:    st.d $zero, $sp, 0
++; CHECK-NEXT:    ori $a0, $zero, 1
++; CHECK-NEXT:    ori $a1, $zero, 2
++; CHECK-NEXT:    ori $a2, $zero, 3
++; CHECK-NEXT:    ori $a3, $zero, 4
++; CHECK-NEXT:    ori $a4, $zero, 5
++; CHECK-NEXT:    ori $a6, $zero, 6
++; CHECK-NEXT:    ori $a7, $zero, 7
++; CHECK-NEXT:    move $a5, $zero
++; CHECK-NEXT:    bl %plt(callee_many_scalars)
++; CHECK-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
++; CHECK-NEXT:    addi.d $sp, $sp, 32
++; CHECK-NEXT:    ret
++  %1 = call i64 @callee_many_scalars(i8 1, i16 2, i32 3, i64 4, i128 5, i64 6, i128 7, i64 8)
++  ret i64 %1
++}
++
++;; Check that i256 is passed indirectly.
++
++define i64 @callee_large_scalars(i256 %a, i256 %b) nounwind {
++; CHECK-LABEL: callee_large_scalars:
++; CHECK:       # %bb.0:
++; CHECK-NEXT:    ld.d $a2, $a1, 24
++; CHECK-NEXT:    ld.d $a3, $a0, 24
++; CHECK-NEXT:    xor $a2, $a3, $a2
++; CHECK-NEXT:    ld.d $a3, $a1, 8
++; CHECK-NEXT:    ld.d $a4, $a0, 8
++; CHECK-NEXT:    xor $a3, $a4, $a3
++; CHECK-NEXT:    or $a2, $a3, $a2
++; CHECK-NEXT:    ld.d $a3, $a1, 16
++; CHECK-NEXT:    ld.d $a4, $a0, 16
++; CHECK-NEXT:    xor $a3, $a4, $a3
++; CHECK-NEXT:    ld.d $a1, $a1, 0
++; CHECK-NEXT:    ld.d $a0, $a0, 0
++; CHECK-NEXT:    xor $a0, $a0, $a1
++; CHECK-NEXT:    or $a0, $a0, $a3
++; CHECK-NEXT:    or $a0, $a0, $a2
++; CHECK-NEXT:    sltui $a0, $a0, 1
++; CHECK-NEXT:    ret
++  %1 = icmp eq i256 %a, %b
++  %2 = zext i1 %1 to i64
++  ret i64 %2
++}
++
++define i64 @caller_large_scalars() nounwind {
++; CHECK-LABEL: caller_large_scalars:
++; CHECK:       # %bb.0:
++; CHECK-NEXT:    addi.d $sp, $sp, -80
++; CHECK-NEXT:    st.d $ra, $sp, 72 # 8-byte Folded Spill
++; CHECK-NEXT:    ori $a0, $zero, 2
++; CHECK-NEXT:    st.d $a0, $sp, 0
++; CHECK-NEXT:    st.d $zero, $sp, 24
++; CHECK-NEXT:    st.d $zero, $sp, 16
++; CHECK-NEXT:    st.d $zero, $sp, 8
++; CHECK-NEXT:    st.d $zero, $sp, 56
++; CHECK-NEXT:    st.d $zero, $sp, 48
++; CHECK-NEXT:    st.d $zero, $sp, 40
++; CHECK-NEXT:    ori $a0, $zero, 1
++; CHECK-NEXT:    st.d $a0, $sp, 32
++; CHECK-NEXT:    addi.d $a0, $sp, 32
++; CHECK-NEXT:    addi.d $a1, $sp, 0
++; CHECK-NEXT:    bl %plt(callee_large_scalars)
++; CHECK-NEXT:    ld.d $ra, $sp, 72 # 8-byte Folded Reload
++; CHECK-NEXT:    addi.d $sp, $sp, 80
++; CHECK-NEXT:    ret
++  %1 = call i64 @callee_large_scalars(i256 1, i256 2)
++  ret i64 %1
++}
++
++;; Check that arguments larger than 2*GRLen are handled correctly when their
++;; address is passed on the stack rather than in memory.
++
++;; Must keep define on a single line due to an update_llc_test_checks.py limitation
++define i64 @callee_large_scalars_exhausted_regs(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i256 %h, i64 %i, i256 %j) nounwind {
++; CHECK-LABEL: callee_large_scalars_exhausted_regs:
++; CHECK:       # %bb.0:
++; CHECK-NEXT:    ld.d $a0, $sp, 8
++; CHECK-NEXT:    ld.d $a1, $a0, 24
++; CHECK-NEXT:    ld.d $a2, $a7, 24
++; CHECK-NEXT:    xor $a1, $a2, $a1
++; CHECK-NEXT:    ld.d $a2, $a0, 8
++; CHECK-NEXT:    ld.d $a3, $a7, 8
++; CHECK-NEXT:    xor $a2, $a3, $a2
++; CHECK-NEXT:    or $a1, $a2, $a1
++; CHECK-NEXT:    ld.d $a2, $a0, 16
++; CHECK-NEXT:    ld.d $a3, $a7, 16
++; CHECK-NEXT:    xor $a2, $a3, $a2
++; CHECK-NEXT:    ld.d $a0, $a0, 0
++; CHECK-NEXT:    ld.d $a3, $a7, 0
++; CHECK-NEXT:    xor $a0, $a3, $a0
++; CHECK-NEXT:    or $a0, $a0, $a2
++; CHECK-NEXT:    or $a0, $a0, $a1
++; CHECK-NEXT:    sltui $a0, $a0, 1
++; CHECK-NEXT:    ret
++  %1 = icmp eq i256 %h, %j
++  %2 = zext i1 %1 to i64
++  ret i64 %2
++}
++
++define i64 @caller_large_scalars_exhausted_regs() nounwind {
++; CHECK-LABEL: caller_large_scalars_exhausted_regs:
++; CHECK:       # %bb.0:
++; CHECK-NEXT:    addi.d $sp, $sp, -96
++; CHECK-NEXT:    st.d $ra, $sp, 88 # 8-byte Folded Spill
++; CHECK-NEXT:    addi.d $a0, $sp, 16
++; CHECK-NEXT:    st.d $a0, $sp, 8
++; CHECK-NEXT:    ori $a0, $zero, 9
++; CHECK-NEXT:    st.d $a0, $sp, 0
++; CHECK-NEXT:    ori $a0, $zero, 10
++; CHECK-NEXT:    st.d $a0, $sp, 16
++; CHECK-NEXT:    st.d $zero, $sp, 40
++; CHECK-NEXT:    st.d $zero, $sp, 32
++; CHECK-NEXT:    st.d $zero, $sp, 24
++; CHECK-NEXT:    st.d $zero, $sp, 72
++; CHECK-NEXT:    st.d $zero, $sp, 64
++; CHECK-NEXT:    st.d $zero, $sp, 56
++; CHECK-NEXT:    ori $a0, $zero, 8
++; CHECK-NEXT:    st.d $a0, $sp, 48
++; CHECK-NEXT:    ori $a0, $zero, 1
++; CHECK-NEXT:    ori $a1, $zero, 2
++; CHECK-NEXT:    ori $a2, $zero, 3
++; CHECK-NEXT:    ori $a3, $zero, 4
++; CHECK-NEXT:    ori $a4, $zero, 5
++; CHECK-NEXT:    ori $a5, $zero, 6
++; CHECK-NEXT:    ori $a6, $zero, 7
++; CHECK-NEXT:    addi.d $a7, $sp, 48
++; CHECK-NEXT:    bl %plt(callee_large_scalars_exhausted_regs)
++; CHECK-NEXT:    ld.d $ra, $sp, 88 # 8-byte Folded Reload
++; CHECK-NEXT:    addi.d $sp, $sp, 96
++; CHECK-NEXT:    ret
++  %1 = call i64 @callee_large_scalars_exhausted_regs(
++      i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i256 8, i64 9,
++      i256 10)
++  ret i64 %1
++}
++
++;; Check large struct arguments, which are passed byval
++
++%struct.large = type { i64, i64, i64, i64 }
++
++define i64 @callee_large_struct(ptr byval(%struct.large) align 8 %a) nounwind {
++; CHECK-LABEL: callee_large_struct:
++; CHECK:       # %bb.0:
++; CHECK-NEXT:    ld.d $a1, $a0, 24
++; CHECK-NEXT:    ld.d $a0, $a0, 0
++; CHECK-NEXT:    add.d $a0, $a0, $a1
++; CHECK-NEXT:    ret
++  %1 = getelementptr inbounds %struct.large, ptr %a, i64 0, i32 0
++  %2 = getelementptr inbounds %struct.large, ptr %a, i64 0, i32 3
++  %3 = load i64, ptr %1
++  %4 = load i64, ptr %2
++  %5 = add i64 %3, %4
++  ret i64 %5
++}
++
++define i64 @caller_large_struct() nounwind {
++; CHECK-LABEL: caller_large_struct:
++; CHECK:       # %bb.0:
++; CHECK-NEXT:    addi.d $sp, $sp, -80
++; CHECK-NEXT:    st.d $ra, $sp, 72 # 8-byte Folded Spill
++; CHECK-NEXT:    ori $a0, $zero, 1
++; CHECK-NEXT:    st.d $a0, $sp, 40
++; CHECK-NEXT:    st.d $a0, $sp, 8
++; CHECK-NEXT:    ori $a0, $zero, 2
++; CHECK-NEXT:    st.d $a0, $sp, 48
++; CHECK-NEXT:    st.d $a0, $sp, 16
++; CHECK-NEXT:    ori $a0, $zero, 3
++; CHECK-NEXT:    st.d $a0, $sp, 56
++; CHECK-NEXT:    st.d $a0, $sp, 24
++; CHECK-NEXT:    ori $a0, $zero, 4
++; CHECK-NEXT:    st.d $a0, $sp, 64
++; CHECK-NEXT:    st.d $a0, $sp, 32
++; CHECK-NEXT:    addi.d $a0, $sp, 8
++; CHECK-NEXT:    bl %plt(callee_large_struct)
++; CHECK-NEXT:    ld.d $ra, $sp, 72 # 8-byte Folded Reload
++; CHECK-NEXT:    addi.d $sp, $sp, 80
++; CHECK-NEXT:    ret
++  %ls = alloca %struct.large, align 8
++  %a = getelementptr inbounds %struct.large, ptr %ls, i64 0, i32 0
++  store i64 1, ptr %a
++  %b = getelementptr inbounds %struct.large, ptr %ls, i64 0, i32 1
++  store i64 2, ptr %b
++  %c = getelementptr inbounds %struct.large, ptr %ls, i64 0, i32 2
++  store i64 3, ptr %c
++  %d = getelementptr inbounds %struct.large, ptr %ls, i64 0, i32 3
++  store i64 4, ptr %d
++  %1 = call i64 @callee_large_struct(ptr byval(%struct.large) align 8 %ls)
++  ret i64 %1
++}
++
++;; Check return scalar which size is 2*GRLen.
++
++define i128 @callee_small_scalar_ret() nounwind {
++; CHECK-LABEL: callee_small_scalar_ret:
++; CHECK:       # %bb.0:
++; CHECK-NEXT:    addi.w $a0, $zero, -1
++; CHECK-NEXT:    move $a1, $a0
++; CHECK-NEXT:    ret
++  ret i128 -1
++}
++
++define i64 @caller_small_scalar_ret() nounwind {
++; CHECK-LABEL: caller_small_scalar_ret:
++; CHECK:       # %bb.0:
++; CHECK-NEXT:    addi.d $sp, $sp, -16
++; CHECK-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
++; CHECK-NEXT:    bl %plt(callee_small_scalar_ret)
++; CHECK-NEXT:    addi.w $a2, $zero, -2
++; CHECK-NEXT:    xor $a0, $a0, $a2
++; CHECK-NEXT:    orn $a0, $a0, $a1
++; CHECK-NEXT:    sltui $a0, $a0, 1
++; CHECK-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
++; CHECK-NEXT:    addi.d $sp, $sp, 16
++; CHECK-NEXT:    ret
++  %1 = call i128 @callee_small_scalar_ret()
++  %2 = icmp eq i128 -2, %1
++  %3 = zext i1 %2 to i64
++  ret i64 %3
++}
++
++;; Check return struct which size is 2*GRLen.
++
++%struct.small = type { i64, ptr }
++
++define %struct.small @callee_small_struct_ret() nounwind {
++; CHECK-LABEL: callee_small_struct_ret:
++; CHECK:       # %bb.0:
++; CHECK-NEXT:    ori $a0, $zero, 1
++; CHECK-NEXT:    move $a1, $zero
++; CHECK-NEXT:    ret
++  ret %struct.small { i64 1, ptr null }
++}
++
++define i64 @caller_small_struct_ret() nounwind {
++; CHECK-LABEL: caller_small_struct_ret:
++; CHECK:       # %bb.0:
++; CHECK-NEXT:    addi.d $sp, $sp, -16
++; CHECK-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
++; CHECK-NEXT:    bl %plt(callee_small_struct_ret)
++; CHECK-NEXT:    add.d $a0, $a0, $a1
++; CHECK-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
++; CHECK-NEXT:    addi.d $sp, $sp, 16
++; CHECK-NEXT:    ret
++  %1 = call %struct.small @callee_small_struct_ret()
++  %2 = extractvalue %struct.small %1, 0
++  %3 = extractvalue %struct.small %1, 1
++  %4 = ptrtoint ptr %3 to i64
++  %5 = add i64 %2, %4
++  ret i64 %5
++}
++
++;; Check return scalar which size is more than 2*GRLen.
++
++define i256 @callee_large_scalar_ret() nounwind {
++; CHECK-LABEL: callee_large_scalar_ret:
++; CHECK:       # %bb.0:
++; CHECK-NEXT:    addi.w $a1, $zero, -1
++; CHECK-NEXT:    st.d $a1, $a0, 24
++; CHECK-NEXT:    st.d $a1, $a0, 16
++; CHECK-NEXT:    st.d $a1, $a0, 8
++; CHECK-NEXT:    lu12i.w $a1, -30141
++; CHECK-NEXT:    ori $a1, $a1, 747
++; CHECK-NEXT:    st.d $a1, $a0, 0
++; CHECK-NEXT:    ret
++  ret i256 -123456789
++}
++
++define void @caller_large_scalar_ret() nounwind {
++; CHECK-LABEL: caller_large_scalar_ret:
++; CHECK:       # %bb.0:
++; CHECK-NEXT:    addi.d $sp, $sp, -48
++; CHECK-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
++; CHECK-NEXT:    addi.d $a0, $sp, 0
++; CHECK-NEXT:    bl %plt(callee_large_scalar_ret)
++; CHECK-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
++; CHECK-NEXT:    addi.d $sp, $sp, 48
++; CHECK-NEXT:    ret
++  %1 = call i256 @callee_large_scalar_ret()
++  ret void
++}
++
++;; Check return struct which size is more than 2*GRLen.
++
++define void @callee_large_struct_ret(ptr noalias sret(%struct.large) %agg.result) nounwind {
++; CHECK-LABEL: callee_large_struct_ret:
++; CHECK:       # %bb.0:
++; CHECK-NEXT:    ori $a1, $zero, 4
++; CHECK-NEXT:    st.d $a1, $a0, 24
++; CHECK-NEXT:    ori $a1, $zero, 3
++; CHECK-NEXT:    st.d $a1, $a0, 16
++; CHECK-NEXT:    ori $a1, $zero, 2
++; CHECK-NEXT:    st.d $a1, $a0, 8
++; CHECK-NEXT:    ori $a1, $zero, 1
++; CHECK-NEXT:    st.d $a1, $a0, 0
++; CHECK-NEXT:    ret
++  %a = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 0
++  store i64 1, ptr %a, align 4
++  %b = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 1
++  store i64 2, ptr %b, align 4
++  %c = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 2
++  store i64 3, ptr %c, align 4
++  %d = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 3
++  store i64 4, ptr %d, align 4
++  ret void
++}
++
++define i64 @caller_large_struct_ret() nounwind {
++; CHECK-LABEL: caller_large_struct_ret:
++; CHECK:       # %bb.0:
++; CHECK-NEXT:    addi.d $sp, $sp, -48
++; CHECK-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
++; CHECK-NEXT:    addi.d $a0, $sp, 8
++; CHECK-NEXT:    bl %plt(callee_large_struct_ret)
++; CHECK-NEXT:    ld.d $a0, $sp, 32
++; CHECK-NEXT:    ld.d $a1, $sp, 8
++; CHECK-NEXT:    add.d $a0, $a1, $a0
++; CHECK-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
++; CHECK-NEXT:    addi.d $sp, $sp, 48
++; CHECK-NEXT:    ret
++  %1 = alloca %struct.large
++  call void @callee_large_struct_ret(ptr sret(%struct.large) %1)
++  %2 = getelementptr inbounds %struct.large, ptr %1, i64 0, i32 0
++  %3 = load i64, ptr %2
++  %4 = getelementptr inbounds %struct.large, ptr %1, i64 0, i32 3
++  %5 = load i64, ptr %4
++  %6 = add i64 %3, %5
++  ret i64 %6
++}
+diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll b/llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll
+index ae2ce7291..ceb38876c 100644
+--- a/llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll
++++ b/llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll
+@@ -2,406 +2,7 @@
+ ; RUN: llc --mtriple=loongarch64 --mattr=+d --target-abi=lp64d < %s \
+ ; RUN:   | FileCheck %s
+ 
+-;; Check that on LA64, i128 is passed in a pair of GPRs.
+-define i64 @callee_i128_in_regs(i64 %a, i128 %b) nounwind {
+-; CHECK-LABEL: callee_i128_in_regs:
+-; CHECK:       # %bb.0:
+-; CHECK-NEXT:    add.d $a0, $a0, $a1
+-; CHECK-NEXT:    ret
+-  %b_trunc = trunc i128 %b to i64
+-  %1 = add i64 %a, %b_trunc
+-  ret i64 %1
+-}
+-
+-define i64 @caller_i128_in_regs() nounwind {
+-; CHECK-LABEL: caller_i128_in_regs:
+-; CHECK:       # %bb.0:
+-; CHECK-NEXT:    addi.d $sp, $sp, -16
+-; CHECK-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+-; CHECK-NEXT:    ori $a0, $zero, 1
+-; CHECK-NEXT:    ori $a1, $zero, 2
+-; CHECK-NEXT:    move $a2, $zero
+-; CHECK-NEXT:    bl %plt(callee_i128_in_regs)
+-; CHECK-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+-; CHECK-NEXT:    addi.d $sp, $sp, 16
+-; CHECK-NEXT:    ret
+-  %1 = call i64 @callee_i128_in_regs(i64 1, i128 2)
+-  ret i64 %1
+-}
+-
+-;; Check that the stack is used once the GPRs are exhausted.
+-define i64 @callee_many_scalars(i8 %a, i16 %b, i32 %c, i64 %d, i128 %e, i64 %f, i128 %g, i64 %h) nounwind {
+-; CHECK-LABEL: callee_many_scalars:
+-; CHECK:       # %bb.0:
+-; CHECK-NEXT:    ld.d $t0, $sp, 0
+-; CHECK-NEXT:    xor $a5, $a5, $t0
+-; CHECK-NEXT:    xor $a4, $a4, $a7
+-; CHECK-NEXT:    or $a4, $a4, $a5
+-; CHECK-NEXT:    bstrpick.d $a1, $a1, 15, 0
+-; CHECK-NEXT:    andi $a0, $a0, 255
+-; CHECK-NEXT:    add.d $a0, $a0, $a1
+-; CHECK-NEXT:    bstrpick.d $a1, $a2, 31, 0
+-; CHECK-NEXT:    add.d $a0, $a0, $a1
+-; CHECK-NEXT:    add.d $a0, $a0, $a3
+-; CHECK-NEXT:    sltui $a1, $a4, 1
+-; CHECK-NEXT:    add.d $a0, $a1, $a0
+-; CHECK-NEXT:    add.d $a0, $a0, $a6
+-; CHECK-NEXT:    ld.d $a1, $sp, 8
+-; CHECK-NEXT:    add.d $a0, $a0, $a1
+-; CHECK-NEXT:    ret
+-  %a_ext = zext i8 %a to i64
+-  %b_ext = zext i16 %b to i64
+-  %c_ext = zext i32 %c to i64
+-  %1 = add i64 %a_ext, %b_ext
+-  %2 = add i64 %1, %c_ext
+-  %3 = add i64 %2, %d
+-  %4 = icmp eq i128 %e, %g
+-  %5 = zext i1 %4 to i64
+-  %6 = add i64 %5, %3
+-  %7 = add i64 %6, %f
+-  %8 = add i64 %7, %h
+-  ret i64 %8
+-}
+-
+-define i64 @caller_many_scalars() nounwind {
+-; CHECK-LABEL: caller_many_scalars:
+-; CHECK:       # %bb.0:
+-; CHECK-NEXT:    addi.d $sp, $sp, -32
+-; CHECK-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
+-; CHECK-NEXT:    ori $a0, $zero, 8
+-; CHECK-NEXT:    st.d $a0, $sp, 8
+-; CHECK-NEXT:    st.d $zero, $sp, 0
+-; CHECK-NEXT:    ori $a0, $zero, 1
+-; CHECK-NEXT:    ori $a1, $zero, 2
+-; CHECK-NEXT:    ori $a2, $zero, 3
+-; CHECK-NEXT:    ori $a3, $zero, 4
+-; CHECK-NEXT:    ori $a4, $zero, 5
+-; CHECK-NEXT:    ori $a6, $zero, 6
+-; CHECK-NEXT:    ori $a7, $zero, 7
+-; CHECK-NEXT:    move $a5, $zero
+-; CHECK-NEXT:    bl %plt(callee_many_scalars)
+-; CHECK-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
+-; CHECK-NEXT:    addi.d $sp, $sp, 32
+-; CHECK-NEXT:    ret
+-  %1 = call i64 @callee_many_scalars(i8 1, i16 2, i32 3, i64 4, i128 5, i64 6, i128 7, i64 8)
+-  ret i64 %1
+-}
+-
+-;; Check that i256 is passed indirectly.
+-
+-define i64 @callee_large_scalars(i256 %a, i256 %b) nounwind {
+-; CHECK-LABEL: callee_large_scalars:
+-; CHECK:       # %bb.0:
+-; CHECK-NEXT:    ld.d $a2, $a1, 24
+-; CHECK-NEXT:    ld.d $a3, $a0, 24
+-; CHECK-NEXT:    xor $a2, $a3, $a2
+-; CHECK-NEXT:    ld.d $a3, $a1, 8
+-; CHECK-NEXT:    ld.d $a4, $a0, 8
+-; CHECK-NEXT:    xor $a3, $a4, $a3
+-; CHECK-NEXT:    or $a2, $a3, $a2
+-; CHECK-NEXT:    ld.d $a3, $a1, 16
+-; CHECK-NEXT:    ld.d $a4, $a0, 16
+-; CHECK-NEXT:    xor $a3, $a4, $a3
+-; CHECK-NEXT:    ld.d $a1, $a1, 0
+-; CHECK-NEXT:    ld.d $a0, $a0, 0
+-; CHECK-NEXT:    xor $a0, $a0, $a1
+-; CHECK-NEXT:    or $a0, $a0, $a3
+-; CHECK-NEXT:    or $a0, $a0, $a2
+-; CHECK-NEXT:    sltui $a0, $a0, 1
+-; CHECK-NEXT:    ret
+-  %1 = icmp eq i256 %a, %b
+-  %2 = zext i1 %1 to i64
+-  ret i64 %2
+-}
+-
+-define i64 @caller_large_scalars() nounwind {
+-; CHECK-LABEL: caller_large_scalars:
+-; CHECK:       # %bb.0:
+-; CHECK-NEXT:    addi.d $sp, $sp, -80
+-; CHECK-NEXT:    st.d $ra, $sp, 72 # 8-byte Folded Spill
+-; CHECK-NEXT:    ori $a0, $zero, 2
+-; CHECK-NEXT:    st.d $a0, $sp, 0
+-; CHECK-NEXT:    st.d $zero, $sp, 24
+-; CHECK-NEXT:    st.d $zero, $sp, 16
+-; CHECK-NEXT:    st.d $zero, $sp, 8
+-; CHECK-NEXT:    st.d $zero, $sp, 56
+-; CHECK-NEXT:    st.d $zero, $sp, 48
+-; CHECK-NEXT:    st.d $zero, $sp, 40
+-; CHECK-NEXT:    ori $a0, $zero, 1
+-; CHECK-NEXT:    st.d $a0, $sp, 32
+-; CHECK-NEXT:    addi.d $a0, $sp, 32
+-; CHECK-NEXT:    addi.d $a1, $sp, 0
+-; CHECK-NEXT:    bl %plt(callee_large_scalars)
+-; CHECK-NEXT:    ld.d $ra, $sp, 72 # 8-byte Folded Reload
+-; CHECK-NEXT:    addi.d $sp, $sp, 80
+-; CHECK-NEXT:    ret
+-  %1 = call i64 @callee_large_scalars(i256 1, i256 2)
+-  ret i64 %1
+-}
+-
+-;; Check that arguments larger than 2*GRLen are handled correctly when their
+-;; address is passed on the stack rather than in memory.
+-
+-;; Must keep define on a single line due to an update_llc_test_checks.py limitation
+-define i64 @callee_large_scalars_exhausted_regs(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i256 %h, i64 %i, i256 %j) nounwind {
+-; CHECK-LABEL: callee_large_scalars_exhausted_regs:
+-; CHECK:       # %bb.0:
+-; CHECK-NEXT:    ld.d $a0, $sp, 8
+-; CHECK-NEXT:    ld.d $a1, $a0, 24
+-; CHECK-NEXT:    ld.d $a2, $a7, 24
+-; CHECK-NEXT:    xor $a1, $a2, $a1
+-; CHECK-NEXT:    ld.d $a2, $a0, 8
+-; CHECK-NEXT:    ld.d $a3, $a7, 8
+-; CHECK-NEXT:    xor $a2, $a3, $a2
+-; CHECK-NEXT:    or $a1, $a2, $a1
+-; CHECK-NEXT:    ld.d $a2, $a0, 16
+-; CHECK-NEXT:    ld.d $a3, $a7, 16
+-; CHECK-NEXT:    xor $a2, $a3, $a2
+-; CHECK-NEXT:    ld.d $a0, $a0, 0
+-; CHECK-NEXT:    ld.d $a3, $a7, 0
+-; CHECK-NEXT:    xor $a0, $a3, $a0
+-; CHECK-NEXT:    or $a0, $a0, $a2
+-; CHECK-NEXT:    or $a0, $a0, $a1
+-; CHECK-NEXT:    sltui $a0, $a0, 1
+-; CHECK-NEXT:    ret
+-  %1 = icmp eq i256 %h, %j
+-  %2 = zext i1 %1 to i64
+-  ret i64 %2
+-}
+-
+-define i64 @caller_large_scalars_exhausted_regs() nounwind {
+-; CHECK-LABEL: caller_large_scalars_exhausted_regs:
+-; CHECK:       # %bb.0:
+-; CHECK-NEXT:    addi.d $sp, $sp, -96
+-; CHECK-NEXT:    st.d $ra, $sp, 88 # 8-byte Folded Spill
+-; CHECK-NEXT:    addi.d $a0, $sp, 16
+-; CHECK-NEXT:    st.d $a0, $sp, 8
+-; CHECK-NEXT:    ori $a0, $zero, 9
+-; CHECK-NEXT:    st.d $a0, $sp, 0
+-; CHECK-NEXT:    ori $a0, $zero, 10
+-; CHECK-NEXT:    st.d $a0, $sp, 16
+-; CHECK-NEXT:    st.d $zero, $sp, 40
+-; CHECK-NEXT:    st.d $zero, $sp, 32
+-; CHECK-NEXT:    st.d $zero, $sp, 24
+-; CHECK-NEXT:    st.d $zero, $sp, 72
+-; CHECK-NEXT:    st.d $zero, $sp, 64
+-; CHECK-NEXT:    st.d $zero, $sp, 56
+-; CHECK-NEXT:    ori $a0, $zero, 8
+-; CHECK-NEXT:    st.d $a0, $sp, 48
+-; CHECK-NEXT:    ori $a0, $zero, 1
+-; CHECK-NEXT:    ori $a1, $zero, 2
+-; CHECK-NEXT:    ori $a2, $zero, 3
+-; CHECK-NEXT:    ori $a3, $zero, 4
+-; CHECK-NEXT:    ori $a4, $zero, 5
+-; CHECK-NEXT:    ori $a5, $zero, 6
+-; CHECK-NEXT:    ori $a6, $zero, 7
+-; CHECK-NEXT:    addi.d $a7, $sp, 48
+-; CHECK-NEXT:    bl %plt(callee_large_scalars_exhausted_regs)
+-; CHECK-NEXT:    ld.d $ra, $sp, 88 # 8-byte Folded Reload
+-; CHECK-NEXT:    addi.d $sp, $sp, 96
+-; CHECK-NEXT:    ret
+-  %1 = call i64 @callee_large_scalars_exhausted_regs(
+-      i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i256 8, i64 9,
+-      i256 10)
+-  ret i64 %1
+-}
+-
+-;; Check large struct arguments, which are passed byval
+-
+-%struct.large = type { i64, i64, i64, i64 }
+-
+-define i64 @callee_large_struct(ptr byval(%struct.large) align 8 %a) nounwind {
+-; CHECK-LABEL: callee_large_struct:
+-; CHECK:       # %bb.0:
+-; CHECK-NEXT:    ld.d $a1, $a0, 24
+-; CHECK-NEXT:    ld.d $a0, $a0, 0
+-; CHECK-NEXT:    add.d $a0, $a0, $a1
+-; CHECK-NEXT:    ret
+-  %1 = getelementptr inbounds %struct.large, ptr %a, i64 0, i32 0
+-  %2 = getelementptr inbounds %struct.large, ptr %a, i64 0, i32 3
+-  %3 = load i64, ptr %1
+-  %4 = load i64, ptr %2
+-  %5 = add i64 %3, %4
+-  ret i64 %5
+-}
+-
+-define i64 @caller_large_struct() nounwind {
+-; CHECK-LABEL: caller_large_struct:
+-; CHECK:       # %bb.0:
+-; CHECK-NEXT:    addi.d $sp, $sp, -80
+-; CHECK-NEXT:    st.d $ra, $sp, 72 # 8-byte Folded Spill
+-; CHECK-NEXT:    ori $a0, $zero, 1
+-; CHECK-NEXT:    st.d $a0, $sp, 40
+-; CHECK-NEXT:    st.d $a0, $sp, 8
+-; CHECK-NEXT:    ori $a0, $zero, 2
+-; CHECK-NEXT:    st.d $a0, $sp, 48
+-; CHECK-NEXT:    st.d $a0, $sp, 16
+-; CHECK-NEXT:    ori $a0, $zero, 3
+-; CHECK-NEXT:    st.d $a0, $sp, 56
+-; CHECK-NEXT:    st.d $a0, $sp, 24
+-; CHECK-NEXT:    ori $a0, $zero, 4
+-; CHECK-NEXT:    st.d $a0, $sp, 64
+-; CHECK-NEXT:    st.d $a0, $sp, 32
+-; CHECK-NEXT:    addi.d $a0, $sp, 8
+-; CHECK-NEXT:    bl %plt(callee_large_struct)
+-; CHECK-NEXT:    ld.d $ra, $sp, 72 # 8-byte Folded Reload
+-; CHECK-NEXT:    addi.d $sp, $sp, 80
+-; CHECK-NEXT:    ret
+-  %ls = alloca %struct.large, align 8
+-  %a = getelementptr inbounds %struct.large, ptr %ls, i64 0, i32 0
+-  store i64 1, ptr %a
+-  %b = getelementptr inbounds %struct.large, ptr %ls, i64 0, i32 1
+-  store i64 2, ptr %b
+-  %c = getelementptr inbounds %struct.large, ptr %ls, i64 0, i32 2
+-  store i64 3, ptr %c
+-  %d = getelementptr inbounds %struct.large, ptr %ls, i64 0, i32 3
+-  store i64 4, ptr %d
+-  %1 = call i64 @callee_large_struct(ptr byval(%struct.large) align 8 %ls)
+-  ret i64 %1
+-}
+-
+-;; Check return scalar which size is 2*GRLen.
+-
+-define i128 @callee_small_scalar_ret() nounwind {
+-; CHECK-LABEL: callee_small_scalar_ret:
+-; CHECK:       # %bb.0:
+-; CHECK-NEXT:    addi.w $a0, $zero, -1
+-; CHECK-NEXT:    move $a1, $a0
+-; CHECK-NEXT:    ret
+-  ret i128 -1
+-}
+-
+-define i64 @caller_small_scalar_ret() nounwind {
+-; CHECK-LABEL: caller_small_scalar_ret:
+-; CHECK:       # %bb.0:
+-; CHECK-NEXT:    addi.d $sp, $sp, -16
+-; CHECK-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+-; CHECK-NEXT:    bl %plt(callee_small_scalar_ret)
+-; CHECK-NEXT:    addi.w $a2, $zero, -2
+-; CHECK-NEXT:    xor $a0, $a0, $a2
+-; CHECK-NEXT:    orn $a0, $a0, $a1
+-; CHECK-NEXT:    sltui $a0, $a0, 1
+-; CHECK-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+-; CHECK-NEXT:    addi.d $sp, $sp, 16
+-; CHECK-NEXT:    ret
+-  %1 = call i128 @callee_small_scalar_ret()
+-  %2 = icmp eq i128 -2, %1
+-  %3 = zext i1 %2 to i64
+-  ret i64 %3
+-}
+-
+-;; Check return struct which size is 2*GRLen.
+-
+-%struct.small = type { i64, ptr }
+-
+-define %struct.small @callee_small_struct_ret() nounwind {
+-; CHECK-LABEL: callee_small_struct_ret:
+-; CHECK:       # %bb.0:
+-; CHECK-NEXT:    ori $a0, $zero, 1
+-; CHECK-NEXT:    move $a1, $zero
+-; CHECK-NEXT:    ret
+-  ret %struct.small { i64 1, ptr null }
+-}
+-
+-define i64 @caller_small_struct_ret() nounwind {
+-; CHECK-LABEL: caller_small_struct_ret:
+-; CHECK:       # %bb.0:
+-; CHECK-NEXT:    addi.d $sp, $sp, -16
+-; CHECK-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+-; CHECK-NEXT:    bl %plt(callee_small_struct_ret)
+-; CHECK-NEXT:    add.d $a0, $a0, $a1
+-; CHECK-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+-; CHECK-NEXT:    addi.d $sp, $sp, 16
+-; CHECK-NEXT:    ret
+-  %1 = call %struct.small @callee_small_struct_ret()
+-  %2 = extractvalue %struct.small %1, 0
+-  %3 = extractvalue %struct.small %1, 1
+-  %4 = ptrtoint ptr %3 to i64
+-  %5 = add i64 %2, %4
+-  ret i64 %5
+-}
+-
+-;; Check return scalar which size is more than 2*GRLen.
+-
+-define i256 @callee_large_scalar_ret() nounwind {
+-; CHECK-LABEL: callee_large_scalar_ret:
+-; CHECK:       # %bb.0:
+-; CHECK-NEXT:    addi.w $a1, $zero, -1
+-; CHECK-NEXT:    st.d $a1, $a0, 24
+-; CHECK-NEXT:    st.d $a1, $a0, 16
+-; CHECK-NEXT:    st.d $a1, $a0, 8
+-; CHECK-NEXT:    lu12i.w $a1, -30141
+-; CHECK-NEXT:    ori $a1, $a1, 747
+-; CHECK-NEXT:    st.d $a1, $a0, 0
+-; CHECK-NEXT:    ret
+-  ret i256 -123456789
+-}
+-
+-define void @caller_large_scalar_ret() nounwind {
+-; CHECK-LABEL: caller_large_scalar_ret:
+-; CHECK:       # %bb.0:
+-; CHECK-NEXT:    addi.d $sp, $sp, -48
+-; CHECK-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
+-; CHECK-NEXT:    addi.d $a0, $sp, 0
+-; CHECK-NEXT:    bl %plt(callee_large_scalar_ret)
+-; CHECK-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
+-; CHECK-NEXT:    addi.d $sp, $sp, 48
+-; CHECK-NEXT:    ret
+-  %1 = call i256 @callee_large_scalar_ret()
+-  ret void
+-}
+-
+-;; Check return struct which size is more than 2*GRLen.
+-
+-define void @callee_large_struct_ret(ptr noalias sret(%struct.large) %agg.result) nounwind {
+-; CHECK-LABEL: callee_large_struct_ret:
+-; CHECK:       # %bb.0:
+-; CHECK-NEXT:    ori $a1, $zero, 4
+-; CHECK-NEXT:    st.w $a1, $a0, 24
+-; CHECK-NEXT:    ori $a1, $zero, 3
+-; CHECK-NEXT:    st.w $a1, $a0, 16
+-; CHECK-NEXT:    ori $a1, $zero, 2
+-; CHECK-NEXT:    st.w $a1, $a0, 8
+-; CHECK-NEXT:    st.w $zero, $a0, 28
+-; CHECK-NEXT:    st.w $zero, $a0, 20
+-; CHECK-NEXT:    st.w $zero, $a0, 12
+-; CHECK-NEXT:    st.w $zero, $a0, 4
+-; CHECK-NEXT:    ori $a1, $zero, 1
+-; CHECK-NEXT:    st.w $a1, $a0, 0
+-; CHECK-NEXT:    ret
+-  %a = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 0
+-  store i64 1, ptr %a, align 4
+-  %b = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 1
+-  store i64 2, ptr %b, align 4
+-  %c = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 2
+-  store i64 3, ptr %c, align 4
+-  %d = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 3
+-  store i64 4, ptr %d, align 4
+-  ret void
+-}
+-
+-define i64 @caller_large_struct_ret() nounwind {
+-; CHECK-LABEL: caller_large_struct_ret:
+-; CHECK:       # %bb.0:
+-; CHECK-NEXT:    addi.d $sp, $sp, -48
+-; CHECK-NEXT:    st.d $ra, $sp, 40 # 8-byte Folded Spill
+-; CHECK-NEXT:    addi.d $a0, $sp, 8
+-; CHECK-NEXT:    bl %plt(callee_large_struct_ret)
+-; CHECK-NEXT:    ld.d $a0, $sp, 32
+-; CHECK-NEXT:    ld.d $a1, $sp, 8
+-; CHECK-NEXT:    add.d $a0, $a1, $a0
+-; CHECK-NEXT:    ld.d $ra, $sp, 40 # 8-byte Folded Reload
+-; CHECK-NEXT:    addi.d $sp, $sp, 48
+-; CHECK-NEXT:    ret
+-  %1 = alloca %struct.large
+-  call void @callee_large_struct_ret(ptr sret(%struct.large) %1)
+-  %2 = getelementptr inbounds %struct.large, ptr %1, i64 0, i32 0
+-  %3 = load i64, ptr %2
+-  %4 = getelementptr inbounds %struct.large, ptr %1, i64 0, i32 3
+-  %5 = load i64, ptr %4
+-  %6 = add i64 %3, %5
+-  ret i64 %6
+-}
++;; This file contains specific tests for the lp64d ABI.
+ 
+ ;; Check pass floating-point arguments whith FPRs.
+ 
+@@ -462,26 +63,26 @@ define i64 @caller_double_in_gpr_exhausted_fprs() nounwind {
+ ; CHECK:       # %bb.0:
+ ; CHECK-NEXT:    addi.d $sp, $sp, -16
+ ; CHECK-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI21_0)
+-; CHECK-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI21_0)
++; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI3_0)
++; CHECK-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI3_0)
+ ; CHECK-NEXT:    fld.d $fa1, $a0, 0
+-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI21_1)
+-; CHECK-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI21_1)
++; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI3_1)
++; CHECK-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI3_1)
+ ; CHECK-NEXT:    fld.d $fa2, $a0, 0
+-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI21_2)
+-; CHECK-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI21_2)
++; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI3_2)
++; CHECK-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI3_2)
+ ; CHECK-NEXT:    fld.d $fa3, $a0, 0
+-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI21_3)
+-; CHECK-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI21_3)
++; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI3_3)
++; CHECK-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI3_3)
+ ; CHECK-NEXT:    fld.d $fa4, $a0, 0
+-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI21_4)
+-; CHECK-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI21_4)
++; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI3_4)
++; CHECK-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI3_4)
+ ; CHECK-NEXT:    fld.d $fa5, $a0, 0
+-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI21_5)
+-; CHECK-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI21_5)
++; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI3_5)
++; CHECK-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI3_5)
+ ; CHECK-NEXT:    fld.d $fa6, $a0, 0
+-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI21_6)
+-; CHECK-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI21_6)
++; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI3_6)
++; CHECK-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI3_6)
+ ; CHECK-NEXT:    fld.d $fa7, $a0, 0
+ ; CHECK-NEXT:    addi.d $a0, $zero, 1
+ ; CHECK-NEXT:    movgr2fr.d $fa0, $a0
+diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-lp64s.ll b/llvm/test/CodeGen/LoongArch/calling-conv-lp64s.ll
+new file mode 100644
+index 000000000..d738c066e
+--- /dev/null
++++ b/llvm/test/CodeGen/LoongArch/calling-conv-lp64s.ll
+@@ -0,0 +1,97 @@
++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
++; RUN: llc --mtriple=loongarch64 --target-abi=lp64s < %s | FileCheck %s
++
++;; This file contains specific tests for the lp64s ABI.
++
++define i64 @callee_float_in_regs(i64 %a, float %b) nounwind {
++; CHECK-LABEL: callee_float_in_regs:
++; CHECK:       # %bb.0:
++; CHECK-NEXT:    addi.d $sp, $sp, -16
++; CHECK-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
++; CHECK-NEXT:    st.d $fp, $sp, 0 # 8-byte Folded Spill
++; CHECK-NEXT:    move $fp, $a0
++; CHECK-NEXT:    bstrpick.d $a0, $a1, 31, 0
++; CHECK-NEXT:    bl %plt(__fixsfdi)
++; CHECK-NEXT:    add.d $a0, $fp, $a0
++; CHECK-NEXT:    ld.d $fp, $sp, 0 # 8-byte Folded Reload
++; CHECK-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
++; CHECK-NEXT:    addi.d $sp, $sp, 16
++; CHECK-NEXT:    ret
++  %b_fptosi = fptosi float %b to i64
++  %1 = add i64 %a, %b_fptosi
++  ret i64 %1
++}
++
++define i64 @caller_float_in_regs() nounwind {
++; CHECK-LABEL: caller_float_in_regs:
++; CHECK:       # %bb.0:
++; CHECK-NEXT:    addi.d $sp, $sp, -16
++; CHECK-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
++; CHECK-NEXT:    ori $a0, $zero, 1
++; CHECK-NEXT:    lu12i.w $a1, 262144
++; CHECK-NEXT:    bl %plt(callee_float_in_regs)
++; CHECK-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
++; CHECK-NEXT:    addi.d $sp, $sp, 16
++; CHECK-NEXT:    ret
++  %1 = call i64 @callee_float_in_regs(i64 1, float 2.0)
++  ret i64 %1
++}
++
++define i64 @callee_float_on_stack(i128 %a, i128 %b, i128 %c, i128 %d, float %e) nounwind {
++; CHECK-LABEL: callee_float_on_stack:
++; CHECK:       # %bb.0:
++; CHECK-NEXT:    ld.w $a0, $sp, 0
++; CHECK-NEXT:    ret
++  %1 = trunc i128 %d to i64
++  %2 = bitcast float %e to i32
++  %3 = sext i32 %2 to i64
++  %4 = add i64 %1, %3
++  ret i64 %3
++}
++
++define i64 @caller_float_on_stack() nounwind {
++; CHECK-LABEL: caller_float_on_stack:
++; CHECK:       # %bb.0:
++; CHECK-NEXT:    addi.d $sp, $sp, -16
++; CHECK-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
++; CHECK-NEXT:    lu12i.w $a0, 264704
++; CHECK-NEXT:    st.d $a0, $sp, 0
++; CHECK-NEXT:    ori $a0, $zero, 1
++; CHECK-NEXT:    ori $a2, $zero, 2
++; CHECK-NEXT:    ori $a4, $zero, 3
++; CHECK-NEXT:    ori $a6, $zero, 4
++; CHECK-NEXT:    move $a1, $zero
++; CHECK-NEXT:    move $a3, $zero
++; CHECK-NEXT:    move $a5, $zero
++; CHECK-NEXT:    move $a7, $zero
++; CHECK-NEXT:    bl %plt(callee_float_on_stack)
++; CHECK-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
++; CHECK-NEXT:    addi.d $sp, $sp, 16
++; CHECK-NEXT:    ret
++  %1 = call i64 @callee_float_on_stack(i128 1, i128 2, i128 3, i128 4, float 5.0)
++  ret i64 %1
++}
++
++define float @callee_tiny_scalar_ret() nounwind {
++; CHECK-LABEL: callee_tiny_scalar_ret:
++; CHECK:       # %bb.0:
++; CHECK-NEXT:    lu12i.w $a0, 260096
++; CHECK-NEXT:    ret
++  ret float 1.0
++}
++
++define i64 @caller_tiny_scalar_ret() nounwind {
++; CHECK-LABEL: caller_tiny_scalar_ret:
++; CHECK:       # %bb.0:
++; CHECK-NEXT:    addi.d $sp, $sp, -16
++; CHECK-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
++; CHECK-NEXT:    bl %plt(callee_tiny_scalar_ret)
++; CHECK-NEXT:    addi.w $a0, $a0, 0
++; CHECK-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
++; CHECK-NEXT:    addi.d $sp, $sp, 16
++; CHECK-NEXT:    ret
++  %1 = call float @callee_tiny_scalar_ret()
++  %2 = bitcast float %1 to i32
++  %3 = sext i32 %2 to i64
++  ret i64 %3
++}
+diff --git a/llvm/test/CodeGen/LoongArch/cpus-invalid.ll b/llvm/test/CodeGen/LoongArch/cpus-invalid.ll
+new file mode 100644
+index 000000000..b5435fb90
+--- /dev/null
++++ b/llvm/test/CodeGen/LoongArch/cpus-invalid.ll
+@@ -0,0 +1,7 @@
++; RUN: llc < %s --mtriple=loongarch64 --mattr=+64bit --mcpu=invalidcpu 2>&1 | FileCheck %s
++
++; CHECK: {{.*}} is not a recognized processor for this target
++
++define void @f() {
++  ret void
++}
+diff --git a/llvm/test/CodeGen/LoongArch/cpus.ll b/llvm/test/CodeGen/LoongArch/cpus.ll
+new file mode 100644
+index 000000000..35945ae4d
+--- /dev/null
++++ b/llvm/test/CodeGen/LoongArch/cpus.ll
+@@ -0,0 +1,20 @@
++;; This tests that llc accepts all valid LoongArch CPUs.
++;; Note the 'generic' names have been tested in cpu-name-generic.ll.
++
++; RUN: llc < %s --mtriple=loongarch64 --mcpu=loongarch64 2>&1 | FileCheck %s
++; RUN: llc < %s --mtriple=loongarch64 --mcpu=la464 2>&1 | FileCheck %s
++; RUN: llc < %s --mtriple=loongarch64 2>&1 | FileCheck %s
++
++; CHECK-NOT: {{.*}} is not a recognized processor for this target
++
++define void @f() {
++  ret void
++}
++
++define void @tune_cpu_loongarch64() "tune-cpu"="loongarch64" {
++  ret void
++}
++
++define void @tune_cpu_la464() "tune-cpu"="la464" {
++  ret void
++}
+diff --git a/llvm/test/CodeGen/LoongArch/e_flags.ll b/llvm/test/CodeGen/LoongArch/e_flags.ll
+index d55b9b726..c004d1f9c 100644
+--- a/llvm/test/CodeGen/LoongArch/e_flags.ll
++++ b/llvm/test/CodeGen/LoongArch/e_flags.ll
+@@ -1,15 +1,32 @@
+ ; RUN: llc --mtriple=loongarch32 --filetype=obj %s -o %t-la32
+ ; RUN: llvm-readelf -h %t-la32 | FileCheck %s --check-prefixes=ILP32,ABI-D --match-full-lines
++
++; RUN: llc --mtriple=loongarch32 --filetype=obj %s --target-abi=ilp32s -o %t-ilp32s
++; RUN: llvm-readelf -h %t-ilp32s | FileCheck %s --check-prefixes=ILP32,ABI-S --match-full-lines
++
++; RUN: llc --mtriple=loongarch32 --filetype=obj %s --target-abi=ilp32f -o %t-ilp32f
++; RUN: llvm-readelf -h %t-ilp32f | FileCheck %s --check-prefixes=ILP32,ABI-F --match-full-lines
++
++; RUN: llc --mtriple=loongarch32 --filetype=obj %s --target-abi=ilp32d -o %t-ilp32d
++; RUN: llvm-readelf -h %t-ilp32d | FileCheck %s --check-prefixes=ILP32,ABI-D --match-full-lines
++
+ ; RUN: llc --mtriple=loongarch64 --filetype=obj %s -o %t-la64
+ ; RUN: llvm-readelf -h %t-la64 | FileCheck %s --check-prefixes=LP64,ABI-D --match-full-lines
+ 
+-;; Note that we have not support the -target-abi option to select specific ABI.
+-;; See comments in LoongArchELFStreamer.cpp. So here we only check the default behaviour.
+-;; After -target-abi is supported, we can add more tests.
++; RUN: llc --mtriple=loongarch64 --filetype=obj %s --target-abi=lp64s -o %t-lp64s
++; RUN: llvm-readelf -h %t-lp64s | FileCheck %s --check-prefixes=LP64,ABI-S --match-full-lines
++
++; RUN: llc --mtriple=loongarch64 --filetype=obj %s --target-abi=lp64f -o %t-lp64f
++; RUN: llvm-readelf -h %t-lp64f | FileCheck %s --check-prefixes=LP64,ABI-F --match-full-lines
++
++; RUN: llc --mtriple=loongarch64 --filetype=obj %s --mattr=+d --target-abi=lp64d -o %t-lp64d
++; RUN: llvm-readelf -h %t-lp64d | FileCheck %s --check-prefixes=LP64,ABI-D --match-full-lines
+ 
+ ; LP64: Class: ELF64
+ ; ILP32: Class: ELF32
+ 
++; ABI-S: Flags: 0x41, SOFT-FLOAT, OBJ-v1
++; ABI-F: Flags: 0x42, SINGLE-FLOAT, OBJ-v1
+ ; ABI-D: Flags: 0x43, DOUBLE-FLOAT, OBJ-v1
+ 
+ define void @foo() {
+diff --git a/llvm/test/CodeGen/LoongArch/inline-asm-clobbers-fcc.mir b/llvm/test/CodeGen/LoongArch/inline-asm-clobbers-fcc.mir
+new file mode 100644
+index 000000000..fa5fccb1a
+--- /dev/null
++++ b/llvm/test/CodeGen/LoongArch/inline-asm-clobbers-fcc.mir
+@@ -0,0 +1,33 @@
++# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
++# RUN: llc --mtriple=loongarch64 --mattr=+d --run-pass=greedy %s -o - | FileCheck %s
++
++## Check that fcc register clobbered by inlineasm is correctly saved by examing
++## a pair of pseudos (PseudoST_CFR and PseudoLD_CFR) are generated before and
++## after the INLINEASM.
++...
++---
++name: test
++tracksRegLiveness: true
++body:             |
++  bb.0.entry:
++    liveins: $f0_64, $f1_64
++
++    ; CHECK-LABEL: name: test
++    ; CHECK: liveins: $f0_64, $f1_64
++    ; CHECK-NEXT: {{  $}}
++    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $f1_64
++    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $f0_64
++    ; CHECK-NEXT: [[FCMP_CLT_D:%[0-9]+]]:cfr = FCMP_CLT_D [[COPY]], [[COPY1]]
++    ; CHECK-NEXT: PseudoST_CFR [[FCMP_CLT_D]], %stack.0, 0 :: (store (s64) into %stack.0)
++    ; CHECK-NEXT: INLINEASM &nop, 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $fcc0
++    ; CHECK-NEXT: [[PseudoLD_CFR:%[0-9]+]]:cfr = PseudoLD_CFR %stack.0, 0 :: (load (s64) from %stack.0)
++    ; CHECK-NEXT: $r4 = COPY [[PseudoLD_CFR]]
++    ; CHECK-NEXT: PseudoRET implicit killed $r4
++    %1:fpr64 = COPY $f1_64
++    %0:fpr64 = COPY $f0_64
++    %2:cfr = FCMP_CLT_D %1, %0
++    INLINEASM &"nop", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $fcc0
++    $r4 = COPY %2
++    PseudoRET implicit killed $r4
++
++...
+diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-csr-side-effects.ll b/llvm/test/CodeGen/LoongArch/intrinsic-csr-side-effects.ll
+new file mode 100644
+index 000000000..e3e23e46b
+--- /dev/null
++++ b/llvm/test/CodeGen/LoongArch/intrinsic-csr-side-effects.ll
+@@ -0,0 +1,47 @@
++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
++; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s
++; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s
++
++declare i32 @llvm.loongarch.csrrd.w(i32 immarg) nounwind
++declare i32 @llvm.loongarch.csrwr.w(i32, i32 immarg) nounwind
++declare void @bug()
++
++define dso_local void @foo(i32 noundef signext %flag) nounwind {
++; CHECK-LABEL: foo:
++; CHECK:       # %bb.0: # %entry
++; CHECK-NEXT:    beqz $a0, .LBB0_2
++; CHECK-NEXT:  # %bb.1: # %if.then
++; CHECK-NEXT:    csrrd $a0, 2
++; CHECK-NEXT:    ori $a0, $a0, 1
++; CHECK-NEXT:    csrwr $a0, 2
++; CHECK-NEXT:  .LBB0_2: # %if.end
++; CHECK-NEXT:    csrrd $a0, 2
++; CHECK-NEXT:    andi $a0, $a0, 1
++; CHECK-NEXT:    bnez $a0, .LBB0_4
++; CHECK-NEXT:  # %bb.3: # %if.then2
++; CHECK-NEXT:    b %plt(bug)
++; CHECK-NEXT:  .LBB0_4: # %if.end3
++; CHECK-NEXT:    ret
++entry:
++  %tobool.not = icmp eq i32 %flag, 0
++  br i1 %tobool.not, label %if.end, label %if.then
++
++if.then:                                          ; preds = %entry
++  %0 = tail call i32 @llvm.loongarch.csrrd.w(i32 2)
++  %or = or i32 %0, 1
++  %1 = tail call i32 @llvm.loongarch.csrwr.w(i32 %or, i32 2)
++  br label %if.end
++
++if.end:                                           ; preds = %if.then, %entry
++  %2 = tail call i32 @llvm.loongarch.csrrd.w(i32 2)
++  %and = and i32 %2, 1
++  %tobool1.not = icmp eq i32 %and, 0
++  br i1 %tobool1.not, label %if.then2, label %if.end3
++
++if.then2:                                         ; preds = %if.end
++  tail call void @bug()
++  br label %if.end3
++
++if.end3:                                          ; preds = %if.then2, %if.end
++  ret void
++}
+diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-error.ll b/llvm/test/CodeGen/LoongArch/intrinsic-error.ll
+index 882e7f693..a839ab149 100644
+--- a/llvm/test/CodeGen/LoongArch/intrinsic-error.ll
++++ b/llvm/test/CodeGen/LoongArch/intrinsic-error.ll
+@@ -1,4 +1,3 @@
+-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+ ; RUN: not llc --mtriple=loongarch32 < %s 2>&1 | FileCheck %s
+ ; RUN: not llc --mtriple=loongarch64 < %s 2>&1 | FileCheck %s
+ 
+@@ -13,140 +12,140 @@ declare i32 @llvm.loongarch.csrwr.w(i32, i32 immarg)
+ declare i32 @llvm.loongarch.csrxchg.w(i32, i32, i32 immarg)
+ 
+ define void @dbar_imm_out_of_hi_range() #0 {
+-; CHECK: argument to 'llvm.loongarch.dbar' out of range
++; CHECK: llvm.loongarch.dbar: argument out of range.
+ entry:
+   call void @llvm.loongarch.dbar(i32 32769)
+   ret void
+ }
+ 
+ define void @dbar_imm_out_of_lo_range() #0 {
+-; CHECK: argument to 'llvm.loongarch.dbar' out of range
++; CHECK: llvm.loongarch.dbar: argument out of range.
+ entry:
+   call void @llvm.loongarch.dbar(i32 -1)
+   ret void
+ }
+ 
+ define void @ibar_imm_out_of_hi_range() #0 {
+-; CHECK: argument to 'llvm.loongarch.ibar' out of range
++; CHECK: llvm.loongarch.ibar: argument out of range.
+ entry:
+   call void @llvm.loongarch.ibar(i32 32769)
+   ret void
+ }
+ 
+ define void @ibar_imm_out_of_lo_range() #0 {
+-; CHECK: argument to 'llvm.loongarch.ibar' out of range
++; CHECK: llvm.loongarch.ibar: argument out of range.
+ entry:
+   call void @llvm.loongarch.ibar(i32 -1)
+   ret void
+ }
+ 
+ define void @break_imm_out_of_hi_range() #0 {
+-; CHECK: argument to 'llvm.loongarch.break' out of range
++; CHECK: llvm.loongarch.break: argument out of range.
+ entry:
+   call void @llvm.loongarch.break(i32 32769)
+   ret void
+ }
+ 
+ define void @break_imm_out_of_lo_range() #0 {
+-; CHECK: argument to 'llvm.loongarch.break' out of range
++; CHECK: llvm.loongarch.break: argument out of range.
+ entry:
+   call void @llvm.loongarch.break(i32 -1)
+   ret void
+ }
+ 
+ define void @movgr2fcsr(i32 %a) nounwind {
+-; CHECK: llvm.loongarch.movgr2fcsr expects basic f target feature
++; CHECK: llvm.loongarch.movgr2fcsr: requires basic 'f' target feature.
+ entry:
+   call void @llvm.loongarch.movgr2fcsr(i32 1, i32 %a)
+   ret void
+ }
+ 
+ define void @movgr2fcsr_imm_out_of_hi_range(i32 %a) #0 {
+-; CHECK: argument to 'llvm.loongarch.movgr2fcsr' out of range
++; CHECK: llvm.loongarch.movgr2fcsr: argument out of range.
+ entry:
+   call void @llvm.loongarch.movgr2fcsr(i32 32, i32 %a)
+   ret void
+ }
+ 
+ define void @movgr2fcsr_imm_out_of_lo_range(i32 %a) #0 {
+-; CHECK: argument to 'llvm.loongarch.movgr2fcsr' out of range
++; CHECK: llvm.loongarch.movgr2fcsr: argument out of range.
+ entry:
+   call void @llvm.loongarch.movgr2fcsr(i32 -1, i32 %a)
+   ret void
+ }
+ 
+ define i32 @movfcsr2gr() nounwind {
+-; CHECK: llvm.loongarch.movfcsr2gr expects basic f target feature
++; CHECK: llvm.loongarch.movfcsr2gr: requires basic 'f' target feature.
+ entry:
+   %res = call i32 @llvm.loongarch.movfcsr2gr(i32 1)
+   ret i32 %res
+ }
+ 
+ define i32 @movfcsr2gr_imm_out_of_hi_range() #0 {
+-; CHECK: argument to 'llvm.loongarch.movfcsr2gr' out of range
++; CHECK: llvm.loongarch.movfcsr2gr: argument out of range.
+ entry:
+   %res = call i32 @llvm.loongarch.movfcsr2gr(i32 32)
+   ret i32 %res
+ }
+ 
+ define i32 @movfcsr2gr_imm_out_of_lo_range() #0 {
+-; CHECK: argument to 'llvm.loongarch.movfcsr2gr' out of range
++; CHECK: llvm.loongarch.movfcsr2gr: argument out of range.
+ entry:
+   %res = call i32 @llvm.loongarch.movfcsr2gr(i32 -1)
+   ret i32 %res
+ }
+ 
+ define void @syscall_imm_out_of_hi_range() #0 {
+-; CHECK: argument to 'llvm.loongarch.syscall' out of range
++; CHECK: llvm.loongarch.syscall: argument out of range.
+ entry:
+   call void @llvm.loongarch.syscall(i32 32769)
+   ret void
+ }
+ 
+ define void @syscall_imm_out_of_lo_range() #0 {
+-; CHECK: argument to 'llvm.loongarch.syscall' out of range
++; CHECK: llvm.loongarch.syscall: argument out of range.
+ entry:
+   call void @llvm.loongarch.syscall(i32 -1)
+   ret void
+ }
+ 
+ define i32 @csrrd_w_imm_out_of_hi_range() #0 {
+-; CHECK: argument to 'llvm.loongarch.csrrd.w' out of range
++; CHECK: llvm.loongarch.csrrd.w: argument out of range.
+ entry:
+   %0 = call i32 @llvm.loongarch.csrrd.w(i32 16384)
+   ret i32 %0
+ }
+ 
+ define i32 @csrrd_w_imm_out_of_lo_range() #0 {
+-; CHECK: argument to 'llvm.loongarch.csrrd.w' out of range
++; CHECK: llvm.loongarch.csrrd.w: argument out of range.
+ entry:
+   %0 = call i32 @llvm.loongarch.csrrd.w(i32 -1)
+   ret i32 %0
+ }
+ 
+ define i32 @csrwr_w_imm_out_of_hi_range(i32 %a) #0 {
+-; CHECK: argument to 'llvm.loongarch.csrwr.w' out of range
++; CHECK: llvm.loongarch.csrwr.w: argument out of range.
+ entry:
+   %0 = call i32 @llvm.loongarch.csrwr.w(i32 %a, i32 16384)
+   ret i32 %0
+ }
+ 
+ define i32 @csrwr_w_imm_out_of_lo_range(i32 %a) #0 {
+-; CHECK: argument to 'llvm.loongarch.csrwr.w' out of range
++; CHECK: llvm.loongarch.csrwr.w: argument out of range.
+ entry:
+   %0 = call i32 @llvm.loongarch.csrwr.w(i32 %a, i32 -1)
+   ret i32 %0
+ }
+ 
+ define i32 @csrxchg_w_imm_out_of_hi_range(i32 %a, i32 %b) #0 {
+-; CHECK: argument to 'llvm.loongarch.csrxchg.w' out of range
++; CHECK: llvm.loongarch.csrxchg.w: argument out of range.
+ entry:
+   %0 = call i32 @llvm.loongarch.csrxchg.w(i32 %a, i32 %b, i32 16384)
+   ret i32 %0
+ }
+ 
+ define i32 @csrxchg_w_imm_out_of_lo_range(i32 %a, i32 %b) #0 {
+-; CHECK: argument to 'llvm.loongarch.csrxchg.w' out of range
++; CHECK: llvm.loongarch.csrxchg.w: argument out of range.
+ entry:
+   %0 = call i32 @llvm.loongarch.csrxchg.w(i32 %a, i32 %b, i32 -1)
+   ret i32 %0
+diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-iocsr-side-effects.ll b/llvm/test/CodeGen/LoongArch/intrinsic-iocsr-side-effects.ll
+new file mode 100644
+index 000000000..ad78f7f53
+--- /dev/null
++++ b/llvm/test/CodeGen/LoongArch/intrinsic-iocsr-side-effects.ll
+@@ -0,0 +1,180 @@
++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
++; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s
++
++declare i32 @llvm.loongarch.iocsrrd.b(i32) nounwind
++declare void @llvm.loongarch.iocsrwr.b(i32, i32) nounwind
++declare i32 @llvm.loongarch.iocsrrd.h(i32) nounwind
++declare void @llvm.loongarch.iocsrwr.h(i32, i32) nounwind
++declare i32 @llvm.loongarch.iocsrrd.w(i32) nounwind
++declare void @llvm.loongarch.iocsrwr.w(i32, i32) nounwind
++declare i64 @llvm.loongarch.iocsrrd.d(i32) nounwind
++declare void @llvm.loongarch.iocsrwr.d(i64, i32) nounwind
++declare void @bug()
++
++define dso_local void @test_b(i32 noundef signext %flag) nounwind {
++; CHECK-LABEL: test_b:
++; CHECK:       # %bb.0: # %entry
++; CHECK-NEXT:    beqz $a0, .LBB0_2
++; CHECK-NEXT:  # %bb.1: # %if.then
++; CHECK-NEXT:    ori $a0, $zero, 2
++; CHECK-NEXT:    iocsrrd.b $a1, $a0
++; CHECK-NEXT:    ori $a1, $a1, 1
++; CHECK-NEXT:    iocsrwr.b $a1, $a0
++; CHECK-NEXT:  .LBB0_2: # %if.end
++; CHECK-NEXT:    ori $a0, $zero, 2
++; CHECK-NEXT:    iocsrrd.b $a0, $a0
++; CHECK-NEXT:    andi $a0, $a0, 1
++; CHECK-NEXT:    bnez $a0, .LBB0_4
++; CHECK-NEXT:  # %bb.3: # %if.then2
++; CHECK-NEXT:    b %plt(bug)
++; CHECK-NEXT:  .LBB0_4: # %if.end3
++; CHECK-NEXT:    ret
++entry:
++  %tobool.not = icmp eq i32 %flag, 0
++  br i1 %tobool.not, label %if.end, label %if.then
++
++if.then:                                          ; preds = %entry
++  %0 = tail call i32 @llvm.loongarch.iocsrrd.b(i32 2)
++  %or = or i32 %0, 1
++  tail call void @llvm.loongarch.iocsrwr.b(i32 %or, i32 2)
++  br label %if.end
++
++if.end:                                           ; preds = %if.then, %entry
++  %1 = tail call i32 @llvm.loongarch.iocsrrd.b(i32 2)
++  %and = and i32 %1, 1
++  %tobool1.not = icmp eq i32 %and, 0
++  br i1 %tobool1.not, label %if.then2, label %if.end3
++
++if.then2:                                         ; preds = %if.end
++  tail call void @bug()
++  br label %if.end3
++
++if.end3:                                          ; preds = %if.then2, %if.end
++  ret void
++}
++
++define dso_local void @test_h(i32 noundef signext %flag) nounwind {
++; CHECK-LABEL: test_h:
++; CHECK:       # %bb.0: # %entry
++; CHECK-NEXT:    beqz $a0, .LBB1_2
++; CHECK-NEXT:  # %bb.1: # %if.then
++; CHECK-NEXT:    ori $a0, $zero, 2
++; CHECK-NEXT:    iocsrrd.h $a1, $a0
++; CHECK-NEXT:    ori $a1, $a1, 1
++; CHECK-NEXT:    iocsrwr.h $a1, $a0
++; CHECK-NEXT:  .LBB1_2: # %if.end
++; CHECK-NEXT:    ori $a0, $zero, 2
++; CHECK-NEXT:    iocsrrd.h $a0, $a0
++; CHECK-NEXT:    andi $a0, $a0, 1
++; CHECK-NEXT:    bnez $a0, .LBB1_4
++; CHECK-NEXT:  # %bb.3: # %if.then2
++; CHECK-NEXT:    b %plt(bug)
++; CHECK-NEXT:  .LBB1_4: # %if.end3
++; CHECK-NEXT:    ret
++entry:
++  %tobool.not = icmp eq i32 %flag, 0
++  br i1 %tobool.not, label %if.end, label %if.then
++
++if.then:                                          ; preds = %entry
++  %0 = tail call i32 @llvm.loongarch.iocsrrd.h(i32 2)
++  %or = or i32 %0, 1
++  tail call void @llvm.loongarch.iocsrwr.h(i32 %or, i32 2)
++  br label %if.end
++
++if.end:                                           ; preds = %if.then, %entry
++  %1 = tail call i32 @llvm.loongarch.iocsrrd.h(i32 2)
++  %and = and i32 %1, 1
++  %tobool1.not = icmp eq i32 %and, 0
++  br i1 %tobool1.not, label %if.then2, label %if.end3
++
++if.then2:                                         ; preds = %if.end
++  tail call void @bug()
++  br label %if.end3
++
++if.end3:                                          ; preds = %if.then2, %if.end
++  ret void
++}
++
++define dso_local void @test_w(i32 noundef signext %flag) nounwind {
++; CHECK-LABEL: test_w:
++; CHECK:       # %bb.0: # %entry
++; CHECK-NEXT:    beqz $a0, .LBB2_2
++; CHECK-NEXT:  # %bb.1: # %if.then
++; CHECK-NEXT:    ori $a0, $zero, 2
++; CHECK-NEXT:    iocsrrd.w $a1, $a0
++; CHECK-NEXT:    ori $a1, $a1, 1
++; CHECK-NEXT:    iocsrwr.w $a1, $a0
++; CHECK-NEXT:  .LBB2_2: # %if.end
++; CHECK-NEXT:    ori $a0, $zero, 2
++; CHECK-NEXT:    iocsrrd.w $a0, $a0
++; CHECK-NEXT:    andi $a0, $a0, 1
++; CHECK-NEXT:    bnez $a0, .LBB2_4
++; CHECK-NEXT:  # %bb.3: # %if.then2
++; CHECK-NEXT:    b %plt(bug)
++; CHECK-NEXT:  .LBB2_4: # %if.end3
++; CHECK-NEXT:    ret
++entry:
++  %tobool.not = icmp eq i32 %flag, 0
++  br i1 %tobool.not, label %if.end, label %if.then
++
++if.then:                                          ; preds = %entry
++  %0 = tail call i32 @llvm.loongarch.iocsrrd.w(i32 2)
++  %or = or i32 %0, 1
++  tail call void @llvm.loongarch.iocsrwr.w(i32 %or, i32 2)
++  br label %if.end
++
++if.end:                                           ; preds = %if.then, %entry
++  %1 = tail call i32 @llvm.loongarch.iocsrrd.w(i32 2)
++  %and = and i32 %1, 1
++  %tobool1.not = icmp eq i32 %and, 0
++  br i1 %tobool1.not, label %if.then2, label %if.end3
++
++if.then2:                                         ; preds = %if.end
++  tail call void @bug()
++  br label %if.end3
++
++if.end3:                                          ; preds = %if.then2, %if.end
++  ret void
++}
++
++define dso_local void @test_d(i32 noundef signext %flag) nounwind {
++; CHECK-LABEL: test_d:
++; CHECK:       # %bb.0: # %entry
++; CHECK-NEXT:    beqz $a0, .LBB3_2
++; CHECK-NEXT:  # %bb.1: # %if.then
++; CHECK-NEXT:    ori $a0, $zero, 2
++; CHECK-NEXT:    iocsrrd.d $a1, $a0
++; CHECK-NEXT:    ori $a1, $a1, 1
++; CHECK-NEXT:    iocsrwr.d $a1, $a0
++; CHECK-NEXT:  .LBB3_2: # %if.end
++; CHECK-NEXT:    ori $a0, $zero, 2
++; CHECK-NEXT:    iocsrrd.d $a0, $a0
++; CHECK-NEXT:    andi $a0, $a0, 1
++; CHECK-NEXT:    bnez $a0, .LBB3_4
++; CHECK-NEXT:  # %bb.3: # %if.then2
++; CHECK-NEXT:    b %plt(bug)
++; CHECK-NEXT:  .LBB3_4: # %if.end3
++; CHECK-NEXT:    ret
++entry:
++  %tobool.not = icmp eq i32 %flag, 0
++  br i1 %tobool.not, label %if.end, label %if.then
++
++if.then:                                          ; preds = %entry
++  %0 = tail call i64 @llvm.loongarch.iocsrrd.d(i32 2)
++  %or = or i64 %0, 1
++  tail call void @llvm.loongarch.iocsrwr.d(i64 %or, i32 2)
++  br label %if.end
++
++if.end:                                           ; preds = %if.then, %entry
++  %1 = tail call i64 @llvm.loongarch.iocsrrd.d(i32 2)
++  %and = and i64 %1, 1
++  %tobool1.not = icmp eq i64 %and, 0
++  br i1 %tobool1.not, label %if.then2, label %if.end3
++
++if.then2:                                         ; preds = %if.end
++  tail call void @bug()
++  br label %if.end3
++
++if.end3:                                          ; preds = %if.then2, %if.end
++  ret void
++}
+diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-la32-error.ll b/llvm/test/CodeGen/LoongArch/intrinsic-la32-error.ll
+index c91516149..5302ba558 100644
+--- a/llvm/test/CodeGen/LoongArch/intrinsic-la32-error.ll
++++ b/llvm/test/CodeGen/LoongArch/intrinsic-la32-error.ll
+@@ -20,147 +20,147 @@ declare i64 @llvm.loongarch.lddir.d(i64, i64 immarg)
+ declare void @llvm.loongarch.ldpte.d(i64, i64 immarg)
+ 
+ define void @cacop_arg0_out_of_hi_range(i32 %a) nounwind {
+-; CHECK: argument to 'llvm.loongarch.cacop.w' out of range
++; CHECK: llvm.loongarch.cacop.w: argument out of range
+ entry:
+   call void @llvm.loongarch.cacop.w(i32 32, i32 %a, i32 1024)
+   ret void
+ }
+ 
+ define void @cacop_arg0_out_of_lo_range(i32 %a) nounwind {
+-; CHECK: argument to 'llvm.loongarch.cacop.w' out of range
++; CHECK: llvm.loongarch.cacop.w: argument out of range
+ entry:
+   call void @llvm.loongarch.cacop.w(i32 -1, i32 %a, i32 1024)
+   ret void
+ }
+ 
+ define void @cacop_arg2_out_of_hi_range(i32 %a) nounwind {
+-; CHECK: argument to 'llvm.loongarch.cacop.w' out of range
++; CHECK: llvm.loongarch.cacop.w: argument out of range
+ entry:
+   call void @llvm.loongarch.cacop.w(i32 1, i32 %a, i32 4096)
+   ret void
+ }
+ 
+ define void @cacop_arg2_out_of_lo_range(i32 %a) nounwind {
+-; CHECK: argument to 'llvm.loongarch.cacop.w' out of range
++; CHECK: llvm.loongarch.cacop.w: argument out of range
+ entry:
+   call void @llvm.loongarch.cacop.w(i32 1, i32 %a, i32 -4096)
+   ret void
+ }
+ 
+ define i32 @crc_w_b_w(i32 %a, i32 %b) nounwind {
+-; CHECK: llvm.loongarch.crc.w.b.w requires target: loongarch64
++; CHECK: llvm.loongarch.crc.w.b.w: requires loongarch64
+ entry:
+   %res = call i32 @llvm.loongarch.crc.w.b.w(i32 %a, i32 %b)
+   ret i32 %res
+ }
+ 
+ define i32 @crc_w_h_w(i32 %a, i32 %b) nounwind {
+-; CHECK: llvm.loongarch.crc.w.h.w requires target: loongarch64
++; CHECK: llvm.loongarch.crc.w.h.w: requires loongarch64
+ entry:
+   %res = call i32 @llvm.loongarch.crc.w.h.w(i32 %a, i32 %b)
+   ret i32 %res
+ }
+ 
+ define i32 @crc_w_w_w(i32 %a, i32 %b) nounwind {
+-; CHECK: llvm.loongarch.crc.w.w.w requires target: loongarch64
++; CHECK: llvm.loongarch.crc.w.w.w: requires loongarch64
+ entry:
+   %res = call i32 @llvm.loongarch.crc.w.w.w(i32 %a, i32 %b)
+   ret i32 %res
+ }
+ 
+ define i32 @crc_w_d_w(i64 %a, i32 %b) nounwind {
+-; CHECK: llvm.loongarch.crc.w.d.w requires target: loongarch64
++; CHECK: llvm.loongarch.crc.w.d.w: requires loongarch64
+ entry:
+   %res = call i32 @llvm.loongarch.crc.w.d.w(i64 %a, i32 %b)
+   ret i32 %res
+ }
+ 
+ define i32 @crcc_w_b_w(i32 %a, i32 %b) nounwind {
+-; CHECK: llvm.loongarch.crcc.w.b.w requires target: loongarch64
++; CHECK: llvm.loongarch.crcc.w.b.w: requires loongarch64
+ entry:
+   %res = call i32 @llvm.loongarch.crcc.w.b.w(i32 %a, i32 %b)
+   ret i32 %res
+ }
+ 
+ define i32 @crcc_w_h_w(i32 %a, i32 %b) nounwind {
+-; CHECK: llvm.loongarch.crcc.w.h.w requires target: loongarch64
++; CHECK: llvm.loongarch.crcc.w.h.w: requires loongarch64
+ entry:
+   %res = call i32 @llvm.loongarch.crcc.w.h.w(i32 %a, i32 %b)
+   ret i32 %res
+ }
+ 
+ define i32 @crcc_w_w_w(i32 %a, i32 %b) nounwind {
+-; CHECK: llvm.loongarch.crcc.w.w.w requires target: loongarch64
++; CHECK: llvm.loongarch.crcc.w.w.w: requires loongarch64
+ entry:
+   %res = call i32 @llvm.loongarch.crcc.w.w.w(i32 %a, i32 %b)
+   ret i32 %res
+ }
+ 
+ define i32 @crcc_w_d_w(i64 %a, i32 %b) nounwind {
+-; CHECK: llvm.loongarch.crcc.w.d.w requires target: loongarch64
++; CHECK: llvm.loongarch.crcc.w.d.w: requires loongarch64
+ entry:
+   %res = call i32 @llvm.loongarch.crcc.w.d.w(i64 %a, i32 %b)
+   ret i32 %res
+ }
+ 
+ define i64 @csrrd_d() {
+-; CHECK: llvm.loongarch.csrrd.d requires target: loongarch64
++; CHECK: llvm.loongarch.csrrd.d: requires loongarch64
+ entry:
+   %0 = tail call i64 @llvm.loongarch.csrrd.d(i32 1)
+   ret i64 %0
+ }
+ 
+ define i64 @csrwr_d(i64 %a) {
+-; CHECK: llvm.loongarch.csrwr.d requires target: loongarch64
++; CHECK: llvm.loongarch.csrwr.d: requires loongarch64
+ entry:
+   %0 = tail call i64 @llvm.loongarch.csrwr.d(i64 %a, i32 1)
+   ret i64 %0
+ }
+ 
+ define i64 @csrxchg_d(i64 %a, i64 %b) {
+-; CHECK: llvm.loongarch.csrxchg.d requires target: loongarch64
++; CHECK: llvm.loongarch.csrxchg.d: requires loongarch64
+ entry:
+   %0 = tail call i64 @llvm.loongarch.csrxchg.d(i64 %a, i64 %b, i32 1)
+   ret i64 %0
+ }
+ 
+ define i64 @iocsrrd_d(i32 %a) {
+-; CHECK: llvm.loongarch.iocsrrd.d requires target: loongarch64
++; CHECK: llvm.loongarch.iocsrrd.d: requires loongarch64
+ entry:
+   %0 = tail call i64 @llvm.loongarch.iocsrrd.d(i32 %a)
+   ret i64 %0
+ }
+ 
+ define void @iocsrwr_d(i64 %a, i32 signext %b) {
+-; CHECK: llvm.loongarch.iocsrwr.d requires target: loongarch64
++; CHECK: llvm.loongarch.iocsrwr.d: requires loongarch64
+ entry:
+   tail call void @llvm.loongarch.iocsrwr.d(i64 %a, i32 %b)
+   ret void
+ }
+ 
+ define void @asrtle_d(i64 %a, i64 %b) {
+-; CHECK: llvm.loongarch.asrtle.d requires target: loongarch64
++; CHECK: llvm.loongarch.asrtle.d: requires loongarch64
+ entry:
+   tail call void @llvm.loongarch.asrtle.d(i64 %a, i64 %b)
+   ret void
+ }
+ 
+ define void @asrtgt_d(i64 %a, i64 %b) {
+-; CHECK: llvm.loongarch.asrtgt.d requires target: loongarch64
++; CHECK: llvm.loongarch.asrtgt.d: requires loongarch64
+ entry:
+   tail call void @llvm.loongarch.asrtgt.d(i64 %a, i64 %b)
+   ret void
+ }
+ 
+ define i64 @lddir_d(i64 %a) {
+-; CHECK: llvm.loongarch.lddir.d requires target: loongarch64
++; CHECK: llvm.loongarch.lddir.d: requires loongarch64
+ entry:
+   %0 = tail call i64 @llvm.loongarch.lddir.d(i64 %a, i64 1)
+   ret i64 %0
+ }
+ 
+ define void @ldpte_d(i64 %a) {
+-; CHECK: llvm.loongarch.ldpte.d requires target: loongarch64
++; CHECK: llvm.loongarch.ldpte.d: requires loongarch64
+ entry:
+   tail call void @llvm.loongarch.ldpte.d(i64 %a, i64 1)
+   ret void
+diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-la64-error.ll b/llvm/test/CodeGen/LoongArch/intrinsic-la64-error.ll
+index 51f6c4453..4716d401d 100644
+--- a/llvm/test/CodeGen/LoongArch/intrinsic-la64-error.ll
++++ b/llvm/test/CodeGen/LoongArch/intrinsic-la64-error.ll
+@@ -8,76 +8,76 @@ declare i64 @llvm.loongarch.csrwr.d(i64, i32 immarg)
+ declare i64 @llvm.loongarch.csrxchg.d(i64, i64, i32 immarg)
+ 
+ define i64 @csrrd_d_imm_out_of_hi_range() nounwind {
+-; CHECK: argument to 'llvm.loongarch.csrrd.d' out of range
++; CHECK: llvm.loongarch.csrrd.d: argument out of range
+ entry:
+   %0 = call i64 @llvm.loongarch.csrrd.d(i32 16384)
+   ret i64 %0
+ }
+ 
+ define i64 @csrrd_d_imm_out_of_lo_range() nounwind {
+-; CHECK: argument to 'llvm.loongarch.csrrd.d' out of range
++; CHECK: llvm.loongarch.csrrd.d: argument out of range
+ entry:
+   %0 = call i64 @llvm.loongarch.csrrd.d(i32 -1)
+   ret i64 %0
+ }
+ 
+ define i64 @csrwr_d_imm_out_of_hi_range(i64 %a) nounwind {
+-; CHECK: argument to 'llvm.loongarch.csrwr.d' out of range
++; CHECK: llvm.loongarch.csrwr.d: argument out of range
+ entry:
+   %0 = call i64 @llvm.loongarch.csrwr.d(i64 %a, i32 16384)
+   ret i64 %0
+ }
+ 
+ define i64 @csrwr_d_imm_out_of_lo_range(i64 %a) nounwind {
+-; CHECK: argument to 'llvm.loongarch.csrwr.d' out of range
++; CHECK: llvm.loongarch.csrwr.d: argument out of range
+ entry:
+   %0 = call i64 @llvm.loongarch.csrwr.d(i64 %a, i32 -1)
+   ret i64 %0
+ }
+ 
+ define i64 @csrxchg_d_imm_out_of_hi_range(i64 %a, i64 %b) nounwind {
+-; CHECK: argument to 'llvm.loongarch.csrxchg.d' out of range
++; CHECK: llvm.loongarch.csrxchg.d: argument out of range
+ entry:
+   %0 = call i64 @llvm.loongarch.csrxchg.d(i64 %a, i64 %b, i32 16384)
+   ret i64 %0
+ }
+ 
+ define i64 @csrxchg_d_imm_out_of_lo_range(i64 %a, i64 %b) nounwind {
+-; CHECK: argument to 'llvm.loongarch.csrxchg.d' out of range
++; CHECK: llvm.loongarch.csrxchg.d: argument out of range
+ entry:
+   %0 = call i64 @llvm.loongarch.csrxchg.d(i64 %a, i64 %b, i32 -1)
+   ret i64 %0
+ }
+ 
+ define void @cacop_w(i32 %a) nounwind {
+-; CHECK: llvm.loongarch.cacop.w requires target: loongarch32
++; CHECK: llvm.loongarch.cacop.w: requires loongarch32
+   call void @llvm.loongarch.cacop.w(i32 1, i32 %a, i32 4)
+   ret void
+ }
+ 
+ define void @cacop_arg0_out_of_hi_range(i64 %a) nounwind {
+-; CHECK: argument to 'llvm.loongarch.cacop.d' out of range
++; CHECK: llvm.loongarch.cacop.d: argument out of range
+ entry:
+   call void @llvm.loongarch.cacop.d(i64 32, i64 %a, i64 1024)
+   ret void
+ }
+ 
+ define void @cacop_arg0_out_of_lo_range(i64 %a) nounwind {
+-; CHECK: argument to 'llvm.loongarch.cacop.d' out of range
++; CHECK: llvm.loongarch.cacop.d: argument out of range
+ entry:
+   call void @llvm.loongarch.cacop.d(i64 -1, i64 %a, i64 1024)
+   ret void
+ }
+ 
+ define void @cacop_arg2_out_of_hi_range(i64 %a) nounwind {
+-; CHECK: argument to 'llvm.loongarch.cacop.d' out of range
++; CHECK: llvm.loongarch.cacop.d: argument out of range
+ entry:
+   call void @llvm.loongarch.cacop.d(i64 1, i64 %a, i64 4096)
+   ret void
+ }
+ 
+ define void @cacop_arg2_out_of_lo_range(i64 %a) nounwind {
+-; CHECK: argument to 'llvm.loongarch.cacop.d' out of range
++; CHECK: llvm.loongarch.cacop.d: argument out of range
+ entry:
+   call void @llvm.loongarch.cacop.d(i64 1, i64 %a, i64 -4096)
+   ret void
+diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-la64.ll b/llvm/test/CodeGen/LoongArch/intrinsic-la64.ll
+index 7b28682b5..f0ebd8508 100644
+--- a/llvm/test/CodeGen/LoongArch/intrinsic-la64.ll
++++ b/llvm/test/CodeGen/LoongArch/intrinsic-la64.ll
+@@ -29,6 +29,14 @@ define i32 @crc_w_b_w(i32 %a, i32 %b) nounwind {
+   ret i32 %res
+ }
+ 
++define void @crc_w_b_w_noret(i32 %a, i32 %b) nounwind {
++; CHECK-LABEL: crc_w_b_w_noret:
++; CHECK:       # %bb.0:
++; CHECK-NEXT:    ret
++  %res = call i32 @llvm.loongarch.crc.w.b.w(i32 %a, i32 %b)
++  ret void
++}
++
+ define i32 @crc_w_h_w(i32 %a, i32 %b) nounwind {
+ ; CHECK-LABEL: crc_w_h_w:
+ ; CHECK:       # %bb.0:
+@@ -38,6 +46,14 @@ define i32 @crc_w_h_w(i32 %a, i32 %b) nounwind {
+   ret i32 %res
+ }
+ 
++define void @crc_w_h_w_noret(i32 %a, i32 %b) nounwind {
++; CHECK-LABEL: crc_w_h_w_noret:
++; CHECK:       # %bb.0:
++; CHECK-NEXT:    ret
++  %res = call i32 @llvm.loongarch.crc.w.h.w(i32 %a, i32 %b)
++  ret void
++}
++
+ define i32 @crc_w_w_w(i32 %a, i32 %b) nounwind {
+ ; CHECK-LABEL: crc_w_w_w:
+ ; CHECK:       # %bb.0:
+@@ -47,6 +63,14 @@ define i32 @crc_w_w_w(i32 %a, i32 %b) nounwind {
+   ret i32 %res
+ }
+ 
++define void @crc_w_w_w_noret(i32 %a, i32 %b) nounwind {
++; CHECK-LABEL: crc_w_w_w_noret:
++; CHECK:       # %bb.0:
++; CHECK-NEXT:    ret
++  %res = call i32 @llvm.loongarch.crc.w.w.w(i32 %a, i32 %b)
++  ret void
++}
++
+ define void @cacop_d(i64 %a) nounwind {
+ ; CHECK-LABEL: cacop_d:
+ ; CHECK:       # %bb.0:
+@@ -65,6 +89,14 @@ define i32 @crc_w_d_w(i64 %a, i32 %b) nounwind {
+   ret i32 %res
+ }
+ 
++define void @crc_w_d_w_noret(i64 %a, i32 %b) nounwind {
++; CHECK-LABEL: crc_w_d_w_noret:
++; CHECK:       # %bb.0:
++; CHECK-NEXT:    ret
++  %res = call i32 @llvm.loongarch.crc.w.d.w(i64 %a, i32 %b)
++  ret void
++}
++
+ define i32 @crcc_w_b_w(i32 %a, i32 %b) nounwind {
+ ; CHECK-LABEL: crcc_w_b_w:
+ ; CHECK:       # %bb.0:
+@@ -74,6 +106,14 @@ define i32 @crcc_w_b_w(i32 %a, i32 %b) nounwind {
+   ret i32 %res
+ }
+ 
++define void @crcc_w_b_w_noret(i32 %a, i32 %b) nounwind {
++; CHECK-LABEL: crcc_w_b_w_noret:
++; CHECK:       # %bb.0:
++; CHECK-NEXT:    ret
++  %res = call i32 @llvm.loongarch.crcc.w.b.w(i32 %a, i32 %b)
++  ret void
++}
++
+ define i32 @crcc_w_h_w(i32 %a, i32 %b) nounwind {
+ ; CHECK-LABEL: crcc_w_h_w:
+ ; CHECK:       # %bb.0:
+@@ -83,6 +123,14 @@ define i32 @crcc_w_h_w(i32 %a, i32 %b) nounwind {
+   ret i32 %res
+ }
+ 
++define void @crcc_w_h_w_noret(i32 %a, i32 %b) nounwind {
++; CHECK-LABEL: crcc_w_h_w_noret:
++; CHECK:       # %bb.0:
++; CHECK-NEXT:    ret
++  %res = call i32 @llvm.loongarch.crcc.w.h.w(i32 %a, i32 %b)
++  ret void
++}
++
+ define i32 @crcc_w_w_w(i32 %a, i32 %b) nounwind {
+ ; CHECK-LABEL: crcc_w_w_w:
+ ; CHECK:       # %bb.0:
+@@ -92,6 +140,14 @@ define i32 @crcc_w_w_w(i32 %a, i32 %b) nounwind {
+   ret i32 %res
+ }
+ 
++define void @crcc_w_w_w_noret(i32 %a, i32 %b) nounwind {
++; CHECK-LABEL: crcc_w_w_w_noret:
++; CHECK:       # %bb.0:
++; CHECK-NEXT:    ret
++  %res = call i32 @llvm.loongarch.crcc.w.w.w(i32 %a, i32 %b)
++  ret void
++}
++
+ define i32 @crcc_w_d_w(i64 %a, i32 %b) nounwind {
+ ; CHECK-LABEL: crcc_w_d_w:
+ ; CHECK:       # %bb.0:
+@@ -101,6 +157,14 @@ define i32 @crcc_w_d_w(i64 %a, i32 %b) nounwind {
+   ret i32 %res
+ }
+ 
++define void @crcc_w_d_w_noret(i64 %a, i32 %b) nounwind {
++; CHECK-LABEL: crcc_w_d_w_noret:
++; CHECK:       # %bb.0:
++; CHECK-NEXT:    ret
++  %res = call i32 @llvm.loongarch.crcc.w.d.w(i64 %a, i32 %b)
++  ret void
++}
++
+ define i64 @csrrd_d() {
+ ; CHECK-LABEL: csrrd_d:
+ ; CHECK:       # %bb.0: # %entry
+@@ -111,6 +175,16 @@ entry:
+   ret i64 %0
+ }
+ 
++define void @csrrd_d_noret() {
++; CHECK-LABEL: csrrd_d_noret:
++; CHECK:       # %bb.0: # %entry
++; CHECK-NEXT:    csrrd $a0, 1
++; CHECK-NEXT:    ret
++entry:
++  %0 = tail call i64 @llvm.loongarch.csrrd.d(i32 1)
++  ret void
++}
++
+ define i64 @csrwr_d(i64 %a) {
+ ; CHECK-LABEL: csrwr_d:
+ ; CHECK:       # %bb.0: # %entry
+@@ -121,6 +195,17 @@ entry:
+   ret i64 %0
+ }
+ 
++;; Check that csrwr is emitted even if the return value of the intrinsic is not used.
++define void @csrwr_d_noret(i64 %a) {
++; CHECK-LABEL: csrwr_d_noret:
++; CHECK:       # %bb.0: # %entry
++; CHECK-NEXT:    csrwr $a0, 1
++; CHECK-NEXT:    ret
++entry:
++  %0 = tail call i64 @llvm.loongarch.csrwr.d(i64 %a, i32 1)
++  ret void
++}
++
+ define i64 @csrxchg_d(i64 %a, i64 %b) {
+ ; CHECK-LABEL: csrxchg_d:
+ ; CHECK:       # %bb.0: # %entry
+@@ -131,6 +216,17 @@ entry:
+   ret i64 %0
+ }
+ 
++;; Check that csrxchg is emitted even if the return value of the intrinsic is not used.
++define void @csrxchg_d_noret(i64 %a, i64 %b) {
++; CHECK-LABEL: csrxchg_d_noret:
++; CHECK:       # %bb.0: # %entry
++; CHECK-NEXT:    csrxchg $a0, $a1, 1
++; CHECK-NEXT:    ret
++entry:
++  %0 = tail call i64 @llvm.loongarch.csrxchg.d(i64 %a, i64 %b, i32 1)
++  ret void
++}
++
+ define i64 @iocsrrd_d(i32 %a) {
+ ; CHECK-LABEL: iocsrrd_d:
+ ; CHECK:       # %bb.0: # %entry
+@@ -141,6 +237,16 @@ entry:
+   ret i64 %0
+ }
+ 
++define void @iocsrrd_d_noret(i32 %a) {
++; CHECK-LABEL: iocsrrd_d_noret:
++; CHECK:       # %bb.0: # %entry
++; CHECK-NEXT:    iocsrrd.d $a0, $a0
++; CHECK-NEXT:    ret
++entry:
++  %0 = tail call i64 @llvm.loongarch.iocsrrd.d(i32 %a)
++  ret void
++}
++
+ define void @iocsrwr_d(i64 %a, i32 signext %b) {
+ ; CHECK-LABEL: iocsrwr_d:
+ ; CHECK:       # %bb.0: # %entry
+@@ -181,6 +287,16 @@ entry:
+   ret i64 %0
+ }
+ 
++define void @lddir_d_noret(i64 %a) {
++; CHECK-LABEL: lddir_d_noret:
++; CHECK:       # %bb.0: # %entry
++; CHECK-NEXT:    lddir $a0, $a0, 1
++; CHECK-NEXT:    ret
++entry:
++  %0 = tail call i64 @llvm.loongarch.lddir.d(i64 %a, i64 1)
++  ret void
++}
++
+ define void @ldpte_d(i64 %a) {
+ ; CHECK-LABEL: ldpte_d:
+ ; CHECK:       # %bb.0: # %entry
+diff --git a/llvm/test/CodeGen/LoongArch/intrinsic.ll b/llvm/test/CodeGen/LoongArch/intrinsic.ll
+index cfd54e17d..f49a2500a 100644
+--- a/llvm/test/CodeGen/LoongArch/intrinsic.ll
++++ b/llvm/test/CodeGen/LoongArch/intrinsic.ll
+@@ -69,6 +69,17 @@ entry:
+   ret i32 %res
+ }
+ 
++;; TODO: Optimize out `movfcsr2gr` without data-dependency.
++define void @movfcsr2gr_noret() nounwind {
++; CHECK-LABEL: movfcsr2gr_noret:
++; CHECK:       # %bb.0: # %entry
++; CHECK-NEXT:    movfcsr2gr $a0, $fcsr1
++; CHECK-NEXT:    ret
++entry:
++  %res = call i32 @llvm.loongarch.movfcsr2gr(i32 1)
++  ret void
++}
++
+ define void @syscall() nounwind {
+ ; CHECK-LABEL: syscall:
+ ; CHECK:       # %bb.0: # %entry
+@@ -89,6 +100,16 @@ entry:
+   ret i32 %0
+ }
+ 
++define void @csrrd_w_noret() {
++; CHECK-LABEL: csrrd_w_noret:
++; CHECK:       # %bb.0: # %entry
++; CHECK-NEXT:    csrrd $a0, 1
++; CHECK-NEXT:    ret
++entry:
++  %0 = tail call i32 @llvm.loongarch.csrrd.w(i32 1)
++  ret void
++}
++
+ define i32 @csrwr_w(i32 signext %a) {
+ ; CHECK-LABEL: csrwr_w:
+ ; CHECK:       # %bb.0: # %entry
+@@ -99,6 +120,17 @@ entry:
+   ret i32 %0
+ }
+ 
++;; Check that csrwr is emitted even if the return value of the intrinsic is not used.
++define void @csrwr_w_noret(i32 signext %a) {
++; CHECK-LABEL: csrwr_w_noret:
++; CHECK:       # %bb.0: # %entry
++; CHECK-NEXT:    csrwr $a0, 1
++; CHECK-NEXT:    ret
++entry:
++  %0 = tail call i32 @llvm.loongarch.csrwr.w(i32 %a, i32 1)
++  ret void
++}
++
+ define i32 @csrxchg_w(i32 signext %a, i32 signext %b) {
+ ; CHECK-LABEL: csrxchg_w:
+ ; CHECK:       # %bb.0: # %entry
+@@ -109,6 +141,17 @@ entry:
+   ret i32 %0
+ }
+ 
++;; Check that csrxchg is emitted even if the return value of the intrinsic is not used.
++define void @csrxchg_w_noret(i32 signext %a, i32 signext %b) {
++; CHECK-LABEL: csrxchg_w_noret:
++; CHECK:       # %bb.0: # %entry
++; CHECK-NEXT:    csrxchg $a0, $a1, 1
++; CHECK-NEXT:    ret
++entry:
++  %0 = tail call i32 @llvm.loongarch.csrxchg.w(i32 %a, i32 %b, i32 1)
++  ret void
++}
++
+ define i32 @iocsrrd_b(i32 %a) {
+ ; CHECK-LABEL: iocsrrd_b:
+ ; CHECK:       # %bb.0: # %entry
+@@ -139,6 +182,36 @@ entry:
+   ret i32 %0
+ }
+ 
++define void @iocsrrd_b_noret(i32 %a) {
++; CHECK-LABEL: iocsrrd_b_noret:
++; CHECK:       # %bb.0: # %entry
++; CHECK-NEXT:    iocsrrd.b $a0, $a0
++; CHECK-NEXT:    ret
++entry:
++  %0 = tail call i32 @llvm.loongarch.iocsrrd.b(i32 %a)
++  ret void
++}
++
++define void @iocsrrd_h_noret(i32 %a) {
++; CHECK-LABEL: iocsrrd_h_noret:
++; CHECK:       # %bb.0: # %entry
++; CHECK-NEXT:    iocsrrd.h $a0, $a0
++; CHECK-NEXT:    ret
++entry:
++  %0 = tail call i32 @llvm.loongarch.iocsrrd.h(i32 %a)
++  ret void
++}
++
++define void @iocsrrd_w_noret(i32 %a) {
++; CHECK-LABEL: iocsrrd_w_noret:
++; CHECK:       # %bb.0: # %entry
++; CHECK-NEXT:    iocsrrd.w $a0, $a0
++; CHECK-NEXT:    ret
++entry:
++  %0 = tail call i32 @llvm.loongarch.iocsrrd.w(i32 %a)
++  ret void
++}
++
+ define void @iocsrwr_b(i32 %a, i32 %b) {
+ ; CHECK-LABEL: iocsrwr_b:
+ ; CHECK:       # %bb.0: # %entry
+@@ -178,3 +251,12 @@ entry:
+   %0 = tail call i32 @llvm.loongarch.cpucfg(i32 %a)
+   ret i32 %0
+ }
++
++define void @cpucfg_noret(i32 %a) {
++; CHECK-LABEL: cpucfg_noret:
++; CHECK:       # %bb.0: # %entry
++; CHECK-NEXT:    ret
++entry:
++  %0 = tail call i32 @llvm.loongarch.cpucfg(i32 %a)
++  ret void
++}
+diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll
+index 01f96688f..3d7aa871b 100644
+--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll
++++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll
+@@ -315,10 +315,7 @@ define double @double_fadd_acquire(ptr %p) nounwind {
+ ; LA64F-NEXT:    st.d $s2, $sp, 24 # 8-byte Folded Spill
+ ; LA64F-NEXT:    st.d $s3, $sp, 16 # 8-byte Folded Spill
+ ; LA64F-NEXT:    move $fp, $a0
+-; LA64F-NEXT:    ld.wu $a0, $a0, 0
+-; LA64F-NEXT:    ld.wu $a1, $fp, 4
+-; LA64F-NEXT:    slli.d $a1, $a1, 32
+-; LA64F-NEXT:    or $a0, $a1, $a0
++; LA64F-NEXT:    ld.d $a0, $a0, 0
+ ; LA64F-NEXT:    ori $s0, $zero, 8
+ ; LA64F-NEXT:    addi.d $s1, $sp, 8
+ ; LA64F-NEXT:    addi.d $s2, $sp, 0
+@@ -360,11 +357,7 @@ define double @double_fadd_acquire(ptr %p) nounwind {
+ ; LA64D-NEXT:    st.d $s3, $sp, 32 # 8-byte Folded Spill
+ ; LA64D-NEXT:    fst.d $fs0, $sp, 24 # 8-byte Folded Spill
+ ; LA64D-NEXT:    move $fp, $a0
+-; LA64D-NEXT:    ld.wu $a0, $a0, 0
+-; LA64D-NEXT:    ld.wu $a1, $fp, 4
+-; LA64D-NEXT:    slli.d $a1, $a1, 32
+-; LA64D-NEXT:    or $a0, $a1, $a0
+-; LA64D-NEXT:    movgr2fr.d $fa0, $a0
++; LA64D-NEXT:    fld.d $fa0, $a0, 0
+ ; LA64D-NEXT:    addi.d $a0, $zero, 1
+ ; LA64D-NEXT:    movgr2fr.d $fs0, $a0
+ ; LA64D-NEXT:    ori $s0, $zero, 8
+@@ -411,10 +404,7 @@ define double @double_fsub_acquire(ptr %p) nounwind {
+ ; LA64F-NEXT:    st.d $s2, $sp, 24 # 8-byte Folded Spill
+ ; LA64F-NEXT:    st.d $s3, $sp, 16 # 8-byte Folded Spill
+ ; LA64F-NEXT:    move $fp, $a0
+-; LA64F-NEXT:    ld.wu $a0, $a0, 0
+-; LA64F-NEXT:    ld.wu $a1, $fp, 4
+-; LA64F-NEXT:    slli.d $a1, $a1, 32
+-; LA64F-NEXT:    or $a0, $a1, $a0
++; LA64F-NEXT:    ld.d $a0, $a0, 0
+ ; LA64F-NEXT:    ori $s0, $zero, 8
+ ; LA64F-NEXT:    addi.d $s1, $sp, 8
+ ; LA64F-NEXT:    addi.d $s2, $sp, 0
+@@ -456,11 +446,7 @@ define double @double_fsub_acquire(ptr %p) nounwind {
+ ; LA64D-NEXT:    st.d $s3, $sp, 32 # 8-byte Folded Spill
+ ; LA64D-NEXT:    fst.d $fs0, $sp, 24 # 8-byte Folded Spill
+ ; LA64D-NEXT:    move $fp, $a0
+-; LA64D-NEXT:    ld.wu $a0, $a0, 0
+-; LA64D-NEXT:    ld.wu $a1, $fp, 4
+-; LA64D-NEXT:    slli.d $a1, $a1, 32
+-; LA64D-NEXT:    or $a0, $a1, $a0
+-; LA64D-NEXT:    movgr2fr.d $fa0, $a0
++; LA64D-NEXT:    fld.d $fa0, $a0, 0
+ ; LA64D-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI5_0)
+ ; LA64D-NEXT:    addi.d $a0, $a0, %pc_lo12(.LCPI5_0)
+ ; LA64D-NEXT:    fld.d $fs0, $a0, 0
+@@ -507,10 +493,7 @@ define double @double_fmin_acquire(ptr %p) nounwind {
+ ; LA64F-NEXT:    st.d $s2, $sp, 24 # 8-byte Folded Spill
+ ; LA64F-NEXT:    st.d $s3, $sp, 16 # 8-byte Folded Spill
+ ; LA64F-NEXT:    move $fp, $a0
+-; LA64F-NEXT:    ld.wu $a0, $a0, 0
+-; LA64F-NEXT:    ld.wu $a1, $fp, 4
+-; LA64F-NEXT:    slli.d $a1, $a1, 32
+-; LA64F-NEXT:    or $a0, $a1, $a0
++; LA64F-NEXT:    ld.d $a0, $a0, 0
+ ; LA64F-NEXT:    ori $s0, $zero, 8
+ ; LA64F-NEXT:    addi.d $s1, $sp, 8
+ ; LA64F-NEXT:    addi.d $s2, $sp, 0
+@@ -552,11 +535,7 @@ define double @double_fmin_acquire(ptr %p) nounwind {
+ ; LA64D-NEXT:    st.d $s3, $sp, 32 # 8-byte Folded Spill
+ ; LA64D-NEXT:    fst.d $fs0, $sp, 24 # 8-byte Folded Spill
+ ; LA64D-NEXT:    move $fp, $a0
+-; LA64D-NEXT:    ld.wu $a0, $a0, 0
+-; LA64D-NEXT:    ld.wu $a1, $fp, 4
+-; LA64D-NEXT:    slli.d $a1, $a1, 32
+-; LA64D-NEXT:    or $a0, $a1, $a0
+-; LA64D-NEXT:    movgr2fr.d $fa0, $a0
++; LA64D-NEXT:    fld.d $fa0, $a0, 0
+ ; LA64D-NEXT:    addi.d $a0, $zero, 1
+ ; LA64D-NEXT:    movgr2fr.d $fs0, $a0
+ ; LA64D-NEXT:    ori $s0, $zero, 8
+@@ -604,10 +583,7 @@ define double @double_fmax_acquire(ptr %p) nounwind {
+ ; LA64F-NEXT:    st.d $s2, $sp, 24 # 8-byte Folded Spill
+ ; LA64F-NEXT:    st.d $s3, $sp, 16 # 8-byte Folded Spill
+ ; LA64F-NEXT:    move $fp, $a0
+-; LA64F-NEXT:    ld.wu $a0, $a0, 0
+-; LA64F-NEXT:    ld.wu $a1, $fp, 4
+-; LA64F-NEXT:    slli.d $a1, $a1, 32
+-; LA64F-NEXT:    or $a0, $a1, $a0
++; LA64F-NEXT:    ld.d $a0, $a0, 0
+ ; LA64F-NEXT:    ori $s0, $zero, 8
+ ; LA64F-NEXT:    addi.d $s1, $sp, 8
+ ; LA64F-NEXT:    addi.d $s2, $sp, 0
+@@ -649,11 +625,7 @@ define double @double_fmax_acquire(ptr %p) nounwind {
+ ; LA64D-NEXT:    st.d $s3, $sp, 32 # 8-byte Folded Spill
+ ; LA64D-NEXT:    fst.d $fs0, $sp, 24 # 8-byte Folded Spill
+ ; LA64D-NEXT:    move $fp, $a0
+-; LA64D-NEXT:    ld.wu $a0, $a0, 0
+-; LA64D-NEXT:    ld.wu $a1, $fp, 4
+-; LA64D-NEXT:    slli.d $a1, $a1, 32
+-; LA64D-NEXT:    or $a0, $a1, $a0
+-; LA64D-NEXT:    movgr2fr.d $fa0, $a0
++; LA64D-NEXT:    fld.d $fa0, $a0, 0
+ ; LA64D-NEXT:    addi.d $a0, $zero, 1
+ ; LA64D-NEXT:    movgr2fr.d $fs0, $a0
+ ; LA64D-NEXT:    ori $s0, $zero, 8
+diff --git a/llvm/test/CodeGen/LoongArch/patchable-function-entry.ll b/llvm/test/CodeGen/LoongArch/patchable-function-entry.ll
+new file mode 100644
+index 000000000..12d4bfb50
+--- /dev/null
++++ b/llvm/test/CodeGen/LoongArch/patchable-function-entry.ll
+@@ -0,0 +1,63 @@
++;; Test the function attribute "patchable-function-entry".
++;; Adapted from the RISCV test case.
++; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefixes=CHECK,LA32
++; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefixes=CHECK,LA64
++
++define void @f0() "patchable-function-entry"="0" {
++; CHECK-LABEL: f0:
++; CHECK-NEXT:  .Lfunc_begin0:
++; CHECK-NOT:     nop
++; CHECK:         ret
++; CHECK-NOT:   .section __patchable_function_entries
++  ret void
++}
++
++define void @f1() "patchable-function-entry"="1" {
++; CHECK-LABEL: f1:
++; CHECK-NEXT: .Lfunc_begin1:
++; CHECK:         nop
++; CHECK-NEXT:    ret
++; CHECK:       .section __patchable_function_entries,"awo",@progbits,f1{{$}}
++; LA32:        .p2align 2
++; LA32-NEXT:   .word .Lfunc_begin1
++; LA64:        .p2align 3
++; LA64-NEXT:   .dword .Lfunc_begin1
++  ret void
++}
++
++$f5 = comdat any
++define void @f5() "patchable-function-entry"="5" comdat {
++; CHECK-LABEL:   f5:
++; CHECK-NEXT:    .Lfunc_begin2:
++; CHECK-COUNT-5:   nop
++; CHECK-NEXT:      ret
++; CHECK:         .section __patchable_function_entries,"aGwo",@progbits,f5,comdat,f5{{$}}
++; LA32:          .p2align 2
++; LA32-NEXT:     .word .Lfunc_begin2
++; LA64:          .p2align 3
++; LA64-NEXT:     .dword .Lfunc_begin2
++  ret void
++}
++
++;; -fpatchable-function-entry=3,2
++;; "patchable-function-prefix" emits data before the function entry label.
++define void @f3_2() "patchable-function-entry"="1" "patchable-function-prefix"="2" {
++; CHECK-LABEL:   .type f3_2,@function
++; CHECK-NEXT:    .Ltmp0: # @f3_2
++; CHECK-COUNT-2:   nop
++; CHECK-NEXT:    f3_2:
++; CHECK:         # %bb.0:
++; CHECK-NEXT:      nop
++; LA32-NEXT:       addi.w $sp, $sp, -16
++; LA64-NEXT:       addi.d $sp, $sp, -16
++;; .size does not include the prefix.
++; CHECK:      .Lfunc_end3:
++; CHECK-NEXT: .size f3_2, .Lfunc_end3-f3_2
++; CHECK:      .section __patchable_function_entries,"awo",@progbits,f3_2{{$}}
++; LA32:       .p2align 2
++; LA32-NEXT:  .word .Ltmp0
++; LA64:       .p2align 3
++; LA64-NEXT:  .dword .Ltmp0
++  %frame = alloca i8, i32 16
++  ret void
++}
+diff --git a/llvm/test/CodeGen/LoongArch/tail-calls.ll b/llvm/test/CodeGen/LoongArch/tail-calls.ll
+index f09b49688..ff5476317 100644
+--- a/llvm/test/CodeGen/LoongArch/tail-calls.ll
++++ b/llvm/test/CodeGen/LoongArch/tail-calls.ll
+@@ -13,6 +13,7 @@ entry:
+ }
+ 
+ ;; Perform tail call optimization for external symbol.
++;; Bytes copied should be large enough, otherwise the memcpy call would be optimized to multiple ld/st insns.
+ @dest = global [2 x i8] zeroinitializer
+ declare void @llvm.memcpy.p0i8.p0i8.i32(ptr, ptr, i32, i1)
+ define void @caller_extern(ptr %src) optsize {
+@@ -21,10 +22,10 @@ define void @caller_extern(ptr %src) optsize {
+ ; CHECK-NEXT:    move $a1, $a0
+ ; CHECK-NEXT:    pcalau12i $a0, %got_pc_hi20(dest)
+ ; CHECK-NEXT:    ld.d $a0, $a0, %got_pc_lo12(dest)
+-; CHECK-NEXT:    ori $a2, $zero, 7
++; CHECK-NEXT:    ori $a2, $zero, 33
+ ; CHECK-NEXT:    b %plt(memcpy)
+ entry:
+-  tail call void @llvm.memcpy.p0i8.p0i8.i32(ptr getelementptr inbounds ([2 x i8], ptr @dest, i32 0, i32 0), ptr %src, i32 7, i1 false)
++  tail call void @llvm.memcpy.p0i8.p0i8.i32(ptr getelementptr inbounds ([2 x i8], ptr @dest, i32 0, i32 0), ptr %src, i32 33, i1 false)
+   ret void
+ }
+ 
+diff --git a/llvm/test/CodeGen/LoongArch/target-abi-from-triple-edge-cases.ll b/llvm/test/CodeGen/LoongArch/target-abi-from-triple-edge-cases.ll
+new file mode 100644
+index 000000000..1d5ed089c
+--- /dev/null
++++ b/llvm/test/CodeGen/LoongArch/target-abi-from-triple-edge-cases.ll
+@@ -0,0 +1,74 @@
++;; Check that an unknown --target-abi is ignored and the triple-implied ABI is
++;; used.
++; RUN: llc --mtriple=loongarch32-linux-gnu --target-abi=foo --mattr=+d < %s 2>&1 \
++; RUN:   | FileCheck %s --check-prefixes=ILP32D,UNKNOWN
++; RUN: llc --mtriple=loongarch64-linux-gnu --target-abi=foo --mattr=+d < %s 2>&1 \
++; RUN:   | FileCheck %s --check-prefixes=LP64D,UNKNOWN
++
++; UNKNOWN: 'foo' is not a recognized ABI for this target, ignoring and using triple-implied ABI
++
++;; Check that --target-abi takes precedence over triple-supplied ABI modifiers.
++; RUN: llc --mtriple=loongarch32-linux-gnusf --target-abi=ilp32d --mattr=+d < %s 2>&1 \
++; RUN:   | FileCheck %s --check-prefixes=ILP32D,CONFLICT-ILP32D
++; RUN: llc --mtriple=loongarch64-linux-gnusf --target-abi=lp64d --mattr=+d < %s 2>&1 \
++; RUN:   | FileCheck %s --check-prefixes=LP64D,CONFLICT-LP64D
++
++; CONFLICT-ILP32D: warning: triple-implied ABI conflicts with provided target-abi 'ilp32d', using target-abi
++; CONFLICT-LP64D:  warning: triple-implied ABI conflicts with provided target-abi 'lp64d', using target-abi
++
++;; Check that no warning is reported when there is no environment component in
++;; triple-supplied ABI modifiers and --target-abi is used.
++; RUN: llc --mtriple=loongarch64-linux --target-abi=lp64d --mattr=+d < %s 2>&1 \
++; RUN:   | FileCheck %s --check-prefixes=LP64D,NO-WARNING
++
++; NO-WARNING-NOT:  warning: triple-implied ABI conflicts with provided target-abi 'lp64d', using target-abi
++
++;; Check that ILP32-on-LA64 and LP64-on-LA32 combinations are handled properly.
++; RUN: llc --mtriple=loongarch64 --target-abi=ilp32d --mattr=+d < %s 2>&1 \
++; RUN:   | FileCheck %s --check-prefixes=LP64D,32ON64
++; RUN: llc --mtriple=loongarch32 --target-abi=lp64d --mattr=+d < %s 2>&1 \
++; RUN:   | FileCheck %s --check-prefixes=ILP32D,64ON32
++
++; 32ON64: 32-bit ABIs are not supported for 64-bit targets, ignoring target-abi and using triple-implied ABI
++; 64ON32: 64-bit ABIs are not supported for 32-bit targets, ignoring target-abi and using triple-implied ABI
++
++define float @f(float %a) {
++; ILP32D-LABEL: f:
++; ILP32D:       # %bb.0:
++; ILP32D-NEXT:    addi.w $a0, $zero, 1
++; ILP32D-NEXT:    movgr2fr.w $fa1, $a0
++; ILP32D-NEXT:    ffint.s.w $fa1, $fa1
++; ILP32D-NEXT:    fadd.s $fa0, $fa0, $fa1
++; ILP32D-NEXT:    ret
++;
++; LP64D-LABEL: f:
++; LP64D:       # %bb.0:
++; LP64D-NEXT:    addi.w $a0, $zero, 1
++; LP64D-NEXT:    movgr2fr.w $fa1, $a0
++; LP64D-NEXT:    ffint.s.w $fa1, $fa1
++; LP64D-NEXT:    fadd.s $fa0, $fa0, $fa1
++; LP64D-NEXT:    ret
++  %1 = fadd float %a, 1.0
++  ret float %1
++}
++
++define double @g(double %a) {
++; ILP32D-LABEL: g:
++; ILP32D:       # %bb.0:
++; ILP32D-NEXT:    addi.w $a0, $zero, 1
++; ILP32D-NEXT:    movgr2fr.w $fa1, $a0
++; ILP32D-NEXT:    ffint.s.w $fa1, $fa1
++; ILP32D-NEXT:    fcvt.d.s $fa1, $fa1
++; ILP32D-NEXT:    fadd.d $fa0, $fa0, $fa1
++; ILP32D-NEXT:    ret
++;
++; LP64D-LABEL: g:
++; LP64D:       # %bb.0:
++; LP64D-NEXT:    addi.d $a0, $zero, 1
++; LP64D-NEXT:    movgr2fr.d $fa1, $a0
++; LP64D-NEXT:    ffint.d.l $fa1, $fa1
++; LP64D-NEXT:    fadd.d $fa0, $fa0, $fa1
++; LP64D-NEXT:    ret
++  %1 = fadd double %a, 1.0
++  ret double %1
++}
+diff --git a/llvm/test/CodeGen/LoongArch/target-abi-from-triple.ll b/llvm/test/CodeGen/LoongArch/target-abi-from-triple.ll
+new file mode 100644
+index 000000000..0aca33903
+--- /dev/null
++++ b/llvm/test/CodeGen/LoongArch/target-abi-from-triple.ll
+@@ -0,0 +1,49 @@
++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
++
++;; Check that the correct ABI is chosen based on the triple given.
++;; TODO: enable the S and F ABIs once support is wired up.
++; RUN: llc --mtriple=loongarch32-linux-gnuf64 --mattr=+d < %s \
++; RUN:   | FileCheck %s --check-prefix=ILP32D
++; RUN: llc --mtriple=loongarch64-linux-gnuf64 --mattr=+d < %s \
++; RUN:   | FileCheck %s --check-prefix=LP64D
++
++define float @f(float %a) {
++; ILP32D-LABEL: f:
++; ILP32D:       # %bb.0:
++; ILP32D-NEXT:    addi.w $a0, $zero, 1
++; ILP32D-NEXT:    movgr2fr.w $fa1, $a0
++; ILP32D-NEXT:    ffint.s.w $fa1, $fa1
++; ILP32D-NEXT:    fadd.s $fa0, $fa0, $fa1
++; ILP32D-NEXT:    ret
++;
++; LP64D-LABEL: f:
++; LP64D:       # %bb.0:
++; LP64D-NEXT:    addi.w $a0, $zero, 1
++; LP64D-NEXT:    movgr2fr.w $fa1, $a0
++; LP64D-NEXT:    ffint.s.w $fa1, $fa1
++; LP64D-NEXT:    fadd.s $fa0, $fa0, $fa1
++; LP64D-NEXT:    ret
++  %1 = fadd float %a, 1.0
++  ret float %1
++}
++
++define double @g(double %a) {
++; ILP32D-LABEL: g:
++; ILP32D:       # %bb.0:
++; ILP32D-NEXT:    addi.w $a0, $zero, 1
++; ILP32D-NEXT:    movgr2fr.w $fa1, $a0
++; ILP32D-NEXT:    ffint.s.w $fa1, $fa1
++; ILP32D-NEXT:    fcvt.d.s $fa1, $fa1
++; ILP32D-NEXT:    fadd.d $fa0, $fa0, $fa1
++; ILP32D-NEXT:    ret
++;
++; LP64D-LABEL: g:
++; LP64D:       # %bb.0:
++; LP64D-NEXT:    addi.d $a0, $zero, 1
++; LP64D-NEXT:    movgr2fr.d $fa1, $a0
++; LP64D-NEXT:    ffint.d.l $fa1, $fa1
++; LP64D-NEXT:    fadd.d $fa0, $fa0, $fa1
++; LP64D-NEXT:    ret
++  %1 = fadd double %a, 1.0
++  ret double %1
++}
+diff --git a/llvm/test/CodeGen/LoongArch/unaligned-access.ll b/llvm/test/CodeGen/LoongArch/unaligned-access.ll
+new file mode 100644
+index 000000000..871c17f06
+--- /dev/null
++++ b/llvm/test/CodeGen/LoongArch/unaligned-access.ll
+@@ -0,0 +1,72 @@
++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
++
++;; Test the ual feature which is similar to AArch64/arm64-strict-align.ll.
++
++; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32-ALIGNED
++; RUN: llc --mtriple=loongarch32 --mattr=+ual < %s | FileCheck %s --check-prefix=LA32-UNALIGNED
++; RUN: llc --mtriple=loongarch32 --mattr=-ual < %s | FileCheck %s --check-prefix=LA32-ALIGNED
++
++; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64-UNALIGNED
++; RUN: llc --mtriple=loongarch64 --mattr=+ual < %s | FileCheck %s --check-prefix=LA64-UNALIGNED
++; RUN: llc --mtriple=loongarch64 --mattr=-ual < %s | FileCheck %s --check-prefix=LA64-ALIGNED
++
++define i32 @f0(ptr %p) nounwind {
++; LA32-ALIGNED-LABEL: f0:
++; LA32-ALIGNED:       # %bb.0:
++; LA32-ALIGNED-NEXT:    ld.hu $a1, $a0, 0
++; LA32-ALIGNED-NEXT:    ld.hu $a0, $a0, 2
++; LA32-ALIGNED-NEXT:    slli.w $a0, $a0, 16
++; LA32-ALIGNED-NEXT:    or $a0, $a0, $a1
++; LA32-ALIGNED-NEXT:    ret
++;
++; LA32-UNALIGNED-LABEL: f0:
++; LA32-UNALIGNED:       # %bb.0:
++; LA32-UNALIGNED-NEXT:    ld.w $a0, $a0, 0
++; LA32-UNALIGNED-NEXT:    ret
++;
++; LA64-UNALIGNED-LABEL: f0:
++; LA64-UNALIGNED:       # %bb.0:
++; LA64-UNALIGNED-NEXT:    ld.w $a0, $a0, 0
++; LA64-UNALIGNED-NEXT:    ret
++;
++; LA64-ALIGNED-LABEL: f0:
++; LA64-ALIGNED:       # %bb.0:
++; LA64-ALIGNED-NEXT:    ld.hu $a1, $a0, 0
++; LA64-ALIGNED-NEXT:    ld.h $a0, $a0, 2
++; LA64-ALIGNED-NEXT:    slli.d $a0, $a0, 16
++; LA64-ALIGNED-NEXT:    or $a0, $a0, $a1
++; LA64-ALIGNED-NEXT:    ret
++  %tmp = load i32, ptr %p, align 2
++  ret i32 %tmp
++}
++
++define i64 @f1(ptr %p) nounwind {
++; LA32-ALIGNED-LABEL: f1:
++; LA32-ALIGNED:       # %bb.0:
++; LA32-ALIGNED-NEXT:    ld.w $a2, $a0, 0
++; LA32-ALIGNED-NEXT:    ld.w $a1, $a0, 4
++; LA32-ALIGNED-NEXT:    move $a0, $a2
++; LA32-ALIGNED-NEXT:    ret
++;
++; LA32-UNALIGNED-LABEL: f1:
++; LA32-UNALIGNED:       # %bb.0:
++; LA32-UNALIGNED-NEXT:    ld.w $a2, $a0, 0
++; LA32-UNALIGNED-NEXT:    ld.w $a1, $a0, 4
++; LA32-UNALIGNED-NEXT:    move $a0, $a2
++; LA32-UNALIGNED-NEXT:    ret
++;
++; LA64-UNALIGNED-LABEL: f1:
++; LA64-UNALIGNED:       # %bb.0:
++; LA64-UNALIGNED-NEXT:    ld.d $a0, $a0, 0
++; LA64-UNALIGNED-NEXT:    ret
++;
++; LA64-ALIGNED-LABEL: f1:
++; LA64-ALIGNED:       # %bb.0:
++; LA64-ALIGNED-NEXT:    ld.wu $a1, $a0, 0
++; LA64-ALIGNED-NEXT:    ld.wu $a0, $a0, 4
++; LA64-ALIGNED-NEXT:    slli.d $a0, $a0, 32
++; LA64-ALIGNED-NEXT:    or $a0, $a0, $a1
++; LA64-ALIGNED-NEXT:    ret
++  %tmp = load i64, ptr %p, align 4
++  ret i64 %tmp
++}
+diff --git a/llvm/test/CodeGen/LoongArch/unaligned-memcpy-inline.ll b/llvm/test/CodeGen/LoongArch/unaligned-memcpy-inline.ll
+new file mode 100644
+index 000000000..37afe7e3e
+--- /dev/null
++++ b/llvm/test/CodeGen/LoongArch/unaligned-memcpy-inline.ll
+@@ -0,0 +1,97 @@
++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
++
++;; Test how memcpy is optimized when ual is turned off which is similar to AArch64/arm64-misaligned-memcpy-inline.ll.
++
++; RUN: llc --mtriple=loongarch32 --mattr=-ual < %s | FileCheck %s --check-prefix=LA32
++; RUN: llc --mtriple=loongarch64 --mattr=-ual < %s | FileCheck %s --check-prefix=LA64
++
++;; Small (16 bytes here) unaligned memcpy() should be a function call if
++;; ual is turned off.
++define void @t0(ptr %out, ptr %in) {
++; LA32-LABEL: t0:
++; LA32:       # %bb.0: # %entry
++; LA32-NEXT:    addi.w $sp, $sp, -16
++; LA32-NEXT:    .cfi_def_cfa_offset 16
++; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
++; LA32-NEXT:    .cfi_offset 1, -4
++; LA32-NEXT:    ori $a2, $zero, 16
++; LA32-NEXT:    bl %plt(memcpy)
++; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
++; LA32-NEXT:    addi.w $sp, $sp, 16
++; LA32-NEXT:    ret
++;
++; LA64-LABEL: t0:
++; LA64:       # %bb.0: # %entry
++; LA64-NEXT:    addi.d $sp, $sp, -16
++; LA64-NEXT:    .cfi_def_cfa_offset 16
++; LA64-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
++; LA64-NEXT:    .cfi_offset 1, -8
++; LA64-NEXT:    ori $a2, $zero, 16
++; LA64-NEXT:    bl %plt(memcpy)
++; LA64-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
++; LA64-NEXT:    addi.d $sp, $sp, 16
++; LA64-NEXT:    ret
++entry:
++  call void @llvm.memcpy.p0.p0.i64(ptr %out, ptr %in, i64 16, i1 false)
++  ret void
++}
++
++;; Small (16 bytes here) aligned memcpy() should be inlined even if
++;; ual is turned off.
++define void @t1(ptr align 8 %out, ptr align 8 %in) {
++; LA32-LABEL: t1:
++; LA32:       # %bb.0: # %entry
++; LA32-NEXT:    ld.w $a2, $a1, 12
++; LA32-NEXT:    st.w $a2, $a0, 12
++; LA32-NEXT:    ld.w $a2, $a1, 8
++; LA32-NEXT:    st.w $a2, $a0, 8
++; LA32-NEXT:    ld.w $a2, $a1, 4
++; LA32-NEXT:    st.w $a2, $a0, 4
++; LA32-NEXT:    ld.w $a1, $a1, 0
++; LA32-NEXT:    st.w $a1, $a0, 0
++; LA32-NEXT:    ret
++;
++; LA64-LABEL: t1:
++; LA64:       # %bb.0: # %entry
++; LA64-NEXT:    ld.d $a2, $a1, 8
++; LA64-NEXT:    st.d $a2, $a0, 8
++; LA64-NEXT:    ld.d $a1, $a1, 0
++; LA64-NEXT:    st.d $a1, $a0, 0
++; LA64-NEXT:    ret
++entry:
++  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %out, ptr align 8 %in, i64 16, i1 false)
++  ret void
++}
++
++;; Tiny (4 bytes here) unaligned memcpy() should be inlined with byte sized
++;; loads and stores if ual is turned off.
++define void @t2(ptr %out, ptr %in) {
++; LA32-LABEL: t2:
++; LA32:       # %bb.0: # %entry
++; LA32-NEXT:    ld.b $a2, $a1, 3
++; LA32-NEXT:    st.b $a2, $a0, 3
++; LA32-NEXT:    ld.b $a2, $a1, 2
++; LA32-NEXT:    st.b $a2, $a0, 2
++; LA32-NEXT:    ld.b $a2, $a1, 1
++; LA32-NEXT:    st.b $a2, $a0, 1
++; LA32-NEXT:    ld.b $a1, $a1, 0
++; LA32-NEXT:    st.b $a1, $a0, 0
++; LA32-NEXT:    ret
++;
++; LA64-LABEL: t2:
++; LA64:       # %bb.0: # %entry
++; LA64-NEXT:    ld.b $a2, $a1, 3
++; LA64-NEXT:    st.b $a2, $a0, 3
++; LA64-NEXT:    ld.b $a2, $a1, 2
++; LA64-NEXT:    st.b $a2, $a0, 2
++; LA64-NEXT:    ld.b $a2, $a1, 1
++; LA64-NEXT:    st.b $a2, $a0, 1
++; LA64-NEXT:    ld.b $a1, $a1, 0
++; LA64-NEXT:    st.b $a1, $a0, 0
++; LA64-NEXT:    ret
++entry:
++  call void @llvm.memcpy.p0.p0.i64(ptr %out, ptr %in, i64 4, i1 false)
++  ret void
++}
++
++declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1)
+diff --git a/llvm/test/DebugInfo/X86/dbg-value-no-crash.ll b/llvm/test/DebugInfo/X86/dbg-value-no-crash.ll
+new file mode 100644
+index 000000000..8ff055f13
+--- /dev/null
++++ b/llvm/test/DebugInfo/X86/dbg-value-no-crash.ll
+@@ -0,0 +1,39 @@
++; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=avx512bw,avx512vl -o - %s
++
++;; Check this won't result in crash.
++define <8 x i32> @foo(ptr %0, <8 x i32> %1, i8 %2, i8 %3) {
++  %5 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %1, <8 x i32> zeroinitializer)
++  %6 = add nsw <8 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
++  call void @llvm.dbg.value(metadata <8 x i32> %6, metadata !4, metadata !DIExpression()), !dbg !15
++  %7 = bitcast i8 %2 to <8 x i1>
++  %8 = select <8 x i1> %7, <8 x i32> %6, <8 x i32> %5
++  %9 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %8, <8 x i32> zeroinitializer)
++  %10 = bitcast i8 %3 to <8 x i1>
++  %11 = select <8 x i1> %10, <8 x i32> %9, <8 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
++  ret <8 x i32> %11
++}
++
++declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>)
++declare void @llvm.dbg.value(metadata, metadata, metadata)
++
++!llvm.dbg.cu = !{!0}
++!llvm.module.flags = !{!3}
++
++!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 16.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !2)
++!1 = !DIFile(filename: "a.cpp", directory: "/")
++!2 = !{}
++!3 = !{i32 2, !"Debug Info Version", i32 3}
++!4 = !DILocalVariable(name: "a", arg: 2, scope: !5, file: !1, line: 12, type: !11)
++!5 = distinct !DISubprogram(name: "foo", scope: !6, file: !1, line: 12, type: !7, scopeLine: 12, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, declaration: !9, retainedNodes: !10)
++!6 = !DINamespace(name: "ns1", scope: null)
++!7 = !DISubroutineType(types: !8)
++!8 = !{null}
++!9 = !DISubprogram(name: "foo", scope: !6, file: !1, line: 132, type: !7, scopeLine: 12, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized)
++!10 = !{!4}
++!11 = !DICompositeType(tag: DW_TAG_array_type, baseType: !12, size: 256, flags: DIFlagVector, elements: !13)
++!12 = !DIBasicType(name: "long long", size: 64, encoding: DW_ATE_signed)
++!13 = !{!14}
++!14 = !DISubrange(count: 4)
++!15 = !DILocation(line: 0, scope: !5, inlinedAt: !16)
++!16 = !DILocation(line: 18, scope: !17)
++!17 = distinct !DISubprogram(name: "foo", scope: null, file: !1, type: !7, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+diff --git a/llvm/test/ExecutionEngine/OrcLazy/emulated-tls.ll b/llvm/test/ExecutionEngine/OrcLazy/emulated-tls.ll
+index 4b603cd29..51a5905fe 100644
+--- a/llvm/test/ExecutionEngine/OrcLazy/emulated-tls.ll
++++ b/llvm/test/ExecutionEngine/OrcLazy/emulated-tls.ll
+@@ -1,3 +1,6 @@
++; LoongArch does not support emulated tls.
++; UNSUPPORTED: target=loongarch{{.*}}
++
+ ; RUN: not lli -no-process-syms -emulated-tls -jit-kind=orc-lazy %s 2>&1 \
+ ; RUN:   | FileCheck %s
+ ;
+diff --git a/llvm/test/ExecutionEngine/OrcLazy/lit.local.cfg b/llvm/test/ExecutionEngine/OrcLazy/lit.local.cfg
+index 4161b4f3c..3a3d23f2b 100644
+--- a/llvm/test/ExecutionEngine/OrcLazy/lit.local.cfg
++++ b/llvm/test/ExecutionEngine/OrcLazy/lit.local.cfg
+@@ -1,6 +1,8 @@
+ import sys
+ 
+-if config.root.host_arch not in ['i386', 'x86', 'x86_64', 'AMD64', 'mips', 'mipsel', 'mips64', 'mips64el']:
++if config.root.host_arch not in ['i386', 'x86', 'x86_64', 'AMD64',
++                                 'mips', 'mipsel', 'mips64', 'mips64el',
++                                 'loongarch64']:
+     config.unsupported = True
+ 
+ # FIXME: These tests don't pass with the COFF rtld.
+diff --git a/llvm/test/ExecutionEngine/frem.ll b/llvm/test/ExecutionEngine/frem.ll
+index b8739c249..d33e4fca8 100644
+--- a/llvm/test/ExecutionEngine/frem.ll
++++ b/llvm/test/ExecutionEngine/frem.ll
+@@ -1,3 +1,6 @@
++; LoongArch does not support mcjit.
++; UNSUPPORTED: target=loongarch{{.*}}
++
+ ; LLI.exe used to crash on Windows\X86 when certain single precession
+ ; floating point intrinsics (defined as macros) are used.
+ ; This unit test guards against the failure.
+diff --git a/llvm/test/ExecutionEngine/lit.local.cfg b/llvm/test/ExecutionEngine/lit.local.cfg
+index e71e7cf3c..b00ef0dcb 100644
+--- a/llvm/test/ExecutionEngine/lit.local.cfg
++++ b/llvm/test/ExecutionEngine/lit.local.cfg
+@@ -1,4 +1,4 @@
+-if config.root.native_target in ['Sparc', 'PowerPC', 'SystemZ', 'Hexagon', 'RISCV', 'LoongArch']:
++if config.root.native_target in ['Sparc', 'PowerPC', 'SystemZ', 'Hexagon', 'RISCV']:
+     config.unsupported = True
+ 
+ # ExecutionEngine tests are not expected to pass in a cross-compilation setup.
+diff --git a/llvm/test/ExecutionEngine/mov64zext32.ll b/llvm/test/ExecutionEngine/mov64zext32.ll
+index bba1a1987..43bd0fb2f 100644
+--- a/llvm/test/ExecutionEngine/mov64zext32.ll
++++ b/llvm/test/ExecutionEngine/mov64zext32.ll
+@@ -1,3 +1,6 @@
++; LoongArch does not support mcjit.
++; UNSUPPORTED: target=loongarch{{.*}}
++
+ ; RUN: %lli -jit-kind=mcjit %s > /dev/null
+ ; RUN: %lli %s > /dev/null
+ 
+diff --git a/llvm/test/ExecutionEngine/test-interp-vec-arithm_float.ll b/llvm/test/ExecutionEngine/test-interp-vec-arithm_float.ll
+index 6f784265a..99d95791c 100644
+--- a/llvm/test/ExecutionEngine/test-interp-vec-arithm_float.ll
++++ b/llvm/test/ExecutionEngine/test-interp-vec-arithm_float.ll
+@@ -1,3 +1,6 @@
++; LoongArch does not support mcjit.
++; UNSUPPORTED: target=loongarch{{.*}}
++
+ ; RUN: %lli -jit-kind=mcjit %s > /dev/null
+ ; RUN: %lli %s > /dev/null
+ 
+diff --git a/llvm/test/ExecutionEngine/test-interp-vec-arithm_int.ll b/llvm/test/ExecutionEngine/test-interp-vec-arithm_int.ll
+index 6896af83c..2e5592d4d 100644
+--- a/llvm/test/ExecutionEngine/test-interp-vec-arithm_int.ll
++++ b/llvm/test/ExecutionEngine/test-interp-vec-arithm_int.ll
+@@ -1,3 +1,6 @@
++; LoongArch does not support mcjit.
++; UNSUPPORTED: target=loongarch{{.*}}
++
+ ; RUN: %lli -jit-kind=mcjit %s > /dev/null
+ ; RUN: %lli %s > /dev/null
+ 
+diff --git a/llvm/test/ExecutionEngine/test-interp-vec-logical.ll b/llvm/test/ExecutionEngine/test-interp-vec-logical.ll
+index f654120ea..1e11659b1 100644
+--- a/llvm/test/ExecutionEngine/test-interp-vec-logical.ll
++++ b/llvm/test/ExecutionEngine/test-interp-vec-logical.ll
+@@ -1,3 +1,6 @@
++; LoongArch does not support mcjit.
++; UNSUPPORTED: target=loongarch{{.*}}
++
+ ; RUN: %lli -jit-kind=mcjit %s > /dev/null
+ ; RUN: %lli %s > /dev/null
+ 
+diff --git a/llvm/test/ExecutionEngine/test-interp-vec-setcond-fp.ll b/llvm/test/ExecutionEngine/test-interp-vec-setcond-fp.ll
+index 84bdec1cf..e919550de 100644
+--- a/llvm/test/ExecutionEngine/test-interp-vec-setcond-fp.ll
++++ b/llvm/test/ExecutionEngine/test-interp-vec-setcond-fp.ll
+@@ -1,3 +1,6 @@
++; LoongArch does not support mcjit.
++; UNSUPPORTED: target=loongarch{{.*}}
++
+ ; RUN: %lli -jit-kind=mcjit %s > /dev/null
+ ; RUN: %lli %s > /dev/null
+ 
+diff --git a/llvm/test/ExecutionEngine/test-interp-vec-setcond-int.ll b/llvm/test/ExecutionEngine/test-interp-vec-setcond-int.ll
+index 5a20fc4f1..9862d6af1 100644
+--- a/llvm/test/ExecutionEngine/test-interp-vec-setcond-int.ll
++++ b/llvm/test/ExecutionEngine/test-interp-vec-setcond-int.ll
+@@ -1,3 +1,6 @@
++; LoongArch does not support mcjit.
++; UNSUPPORTED: target=loongarch{{.*}}
++
+ ; RUN: %lli -jit-kind=mcjit %s > /dev/null
+ ; RUN: %lli %s > /dev/null
+ 
+diff --git a/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll b/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll
+new file mode 100644
+index 000000000..8a4ab5958
+--- /dev/null
++++ b/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll
+@@ -0,0 +1,78 @@
++; RUN: opt < %s -S -passes=msan 2>&1 | FileCheck %s
++
++target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
++target triple = "loongarch64-unknown-linux-gnu"
++
++;; First, check allocation of the save area.
++declare void @llvm.lifetime.start.p0(i64, ptr nocapture) #1
++declare void @llvm.va_start(ptr) #2
++declare void @llvm.va_end(ptr) #2
++declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #1
++define i32 @foo(i32 %guard, ...) {
++; CHECK-LABEL: @foo
++; CHECK:    [[TMP1:%.*]] = load {{.*}} @__msan_va_arg_overflow_size_tls
++; CHECK:    [[TMP2:%.*]] = add i64 0, [[TMP1]]
++; CHECK:    [[TMP3:%.*]] = alloca {{.*}} [[TMP2]]
++; CHECK:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP3]], i8 0, i64 [[TMP2]], i1 false)
++; CHECK:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP2]], i64 800)
++; CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP3]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP4]], i1 false)
++;
++  %vl = alloca ptr, align 8
++  call void @llvm.lifetime.start.p0(i64 32, ptr %vl)
++  call void @llvm.va_start(ptr %vl)
++  call void @llvm.va_end(ptr %vl)
++  call void @llvm.lifetime.end.p0(i64 32, ptr %vl)
++  ret i32 0
++}
++
++;; Save the incoming shadow value from the arguments in the __msan_va_arg_tls
++;; array.
++define i32 @bar() {
++; CHECK-LABEL: @bar
++; CHECK:    store i32 0, ptr @__msan_va_arg_tls, align 8
++; CHECK:    store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8
++; CHECK:    store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8
++; CHECK:    store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls
++;
++  %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00)
++  ret i32 %1
++}
++
++;; Check multiple fixed arguments.
++declare i32 @foo2(i32 %g1, i32 %g2, ...)
++define i32 @bar2() {
++; CHECK-LABEL: @bar2
++; CHECK:    store i64 0, ptr @__msan_va_arg_tls, align 8
++; CHECK:    store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8
++; CHECK:    store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls
++;
++  %1 = call i32 (i32, i32, ...) @foo2(i32 0, i32 1, i64 2, double 3.000000e+00)
++  ret i32 %1
++}
++
++;; Test that MSan doesn't generate code overflowing __msan_va_arg_tls when too many arguments are
++;; passed to a variadic function.
++declare i64 @sum(i64 %n, ...)
++define dso_local i64 @many_args() {
++;; If the size of __msan_va_arg_tls changes the second argument of `add` must also be changed.
++; CHECK-LABEL: @many_args
++; CHECK:    i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 792)
++; CHECK-NOT: i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 800)
++;
++entry:
++  %ret = call i64 (i64, ...) @sum(i64 120,
++  i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
++  i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
++  i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
++  i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
++  i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
++  i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
++  i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
++  i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
++  i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
++  i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
++  i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1,
++  i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1
++  )
++  ret i64 %ret
++}
+diff --git a/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg.ll b/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg.ll
+new file mode 100644
+index 000000000..dcbe2a242
+--- /dev/null
++++ b/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg.ll
+@@ -0,0 +1,14 @@
++; RUN: opt < %s -msan-check-access-address=0 -S -passes=msan 2>&1
++; Test that code using va_start can be compiled on LoongArch.
++
++target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
++target triple = "loongarch64-unknown-linux-gnu"
++
++define void @VaStart(ptr %s, ...) {
++entry:
++  %vl = alloca ptr, align 4
++  call void @llvm.va_start(ptr %vl)
++  ret void
++}
++
++declare void @llvm.va_start(ptr)
+diff --git a/llvm/test/MC/LoongArch/Relocations/sub-expr.s b/llvm/test/MC/LoongArch/Relocations/sub-expr.s
+new file mode 100644
+index 000000000..0179e1027
+--- /dev/null
++++ b/llvm/test/MC/LoongArch/Relocations/sub-expr.s
+@@ -0,0 +1,28 @@
++# RUN: llvm-mc --filetype=obj --triple=loongarch64 %s -o %t
++# RUN: llvm-readobj -r %t | FileCheck %s
++
++## Check that subtraction expressions emit R_LARCH_32_PCREL and R_LARCH_64_PCREL relocations.
++
++## TODO: 1- or 2-byte data relocations are not supported for now.
++
++# CHECK:      Relocations [
++# CHECK-NEXT:   Section ({{.*}}) .rela.data {
++# CHECK-NEXT:     0x0 R_LARCH_64_PCREL sx 0x0
++# CHECK-NEXT:     0x8 R_LARCH_64_PCREL sy 0x0
++# CHECK-NEXT:     0x10 R_LARCH_32_PCREL sx 0x0
++# CHECK-NEXT:     0x14 R_LARCH_32_PCREL sy 0x0
++# CHECK-NEXT:   }
++
++.section sx,"a"
++x:
++nop
++
++.data
++.8byte x-.
++.8byte y-.
++.4byte x-.
++.4byte y-.
++
++.section sy,"a"
++y:
++nop
+diff --git a/llvm/test/tools/llvm-objcopy/ELF/binary-output-target.test b/llvm/test/tools/llvm-objcopy/ELF/binary-output-target.test
+index 78fc14355..fc5856691 100644
+--- a/llvm/test/tools/llvm-objcopy/ELF/binary-output-target.test
++++ b/llvm/test/tools/llvm-objcopy/ELF/binary-output-target.test
+@@ -42,6 +42,12 @@
+ # RUN: llvm-objcopy -I binary -O elf32-hexagon %t.txt %t.hexagon.o
+ # RUN: llvm-readobj --file-headers %t.hexagon.o | FileCheck %s --check-prefixes=CHECK,LE,HEXAGON,32
+ 
++# RUN: llvm-objcopy -I binary -O elf32-loongarch %t.txt %t.la32.o
++# RUN: llvm-readobj --file-headers %t.la32.o | FileCheck %s --check-prefixes=CHECK,LE,LA32,32
++
++# RUN: llvm-objcopy -I binary -O elf64-loongarch %t.txt %t.la64.o
++# RUN: llvm-readobj --file-headers %t.la64.o | FileCheck %s --check-prefixes=CHECK,LE,LA64,64
++
+ # CHECK: Format:
+ # 32-SAME:      elf32-
+ # 64-SAME:      elf64-
+@@ -49,6 +55,8 @@
+ # ARM-SAME:     littlearm
+ # HEXAGON-SAME: hexagon
+ # I386-SAME:    i386
++# LA32-SAME:    loongarch{{$}}
++# LA64-SAME:    loongarch{{$}}
+ # MIPS-SAME:    mips{{$}}
+ # RISCV32-SAME: riscv{{$}}
+ # RISCV64-SAME: riscv{{$}}
+@@ -62,6 +70,8 @@
+ # ARM-NEXT:     Arch: arm
+ # HEXAGON-NEXT: Arch: hexagon
+ # I386-NEXT:    Arch: i386
++# LA32-NEXT:    Arch: loongarch32
++# LA64-NEXT:    Arch: loongarch64
+ # MIPS-NEXT:    Arch: mips{{$}}
+ # PPC32BE-NEXT: Arch: powerpc{{$}}
+ # PPC32LE-NEXT: Arch: powerpcle{{$}}
+@@ -97,6 +107,8 @@
+ # ARM-NEXT:       Machine: EM_ARM (0x28)
+ # HEXAGON-NEXT:   Machine: EM_HEXAGON (0xA4)
+ # I386-NEXT:      Machine: EM_386 (0x3)
++# LA32-NEXT:      Machine: EM_LOONGARCH (0x102)
++# LA64-NEXT:      Machine: EM_LOONGARCH (0x102)
+ # MIPS-NEXT:      Machine: EM_MIPS (0x8)
+ # PPC32-NEXT:     Machine: EM_PPC (0x14)
+ # PPC64-NEXT:     Machine: EM_PPC64 (0x15)
+diff --git a/llvm/test/tools/llvm-objcopy/ELF/cross-arch-headers.test b/llvm/test/tools/llvm-objcopy/ELF/cross-arch-headers.test
+index 98f1b3c64..882940c05 100644
+--- a/llvm/test/tools/llvm-objcopy/ELF/cross-arch-headers.test
++++ b/llvm/test/tools/llvm-objcopy/ELF/cross-arch-headers.test
+@@ -109,6 +109,14 @@
+ # RUN: llvm-readobj --file-headers %t.elf32_hexagon.o | FileCheck %s --check-prefixes=CHECK,LE,HEXAGON,32,SYSV
+ # RUN: llvm-readobj --file-headers %t.elf32_hexagon.dwo | FileCheck %s --check-prefixes=CHECK,LE,HEXAGON,32,SYSV
+ 
++# RUN: llvm-objcopy %t.o -O elf32-loongarch %t.elf32_loongarch.o --split-dwo=%t.elf32_loongarch.dwo
++# RUN: llvm-readobj --file-headers %t.elf32_loongarch.o | FileCheck %s --check-prefixes=CHECK,LE,LA32,32,SYSV
++# RUN: llvm-readobj --file-headers %t.elf32_loongarch.dwo | FileCheck %s --check-prefixes=CHECK,LE,LA32,32,SYSV
++
++# RUN: llvm-objcopy %t.o -O elf64-loongarch %t.elf64_loongarch.o --split-dwo=%t.elf64_loongarch.dwo
++# RUN: llvm-readobj --file-headers %t.elf64_loongarch.o | FileCheck %s --check-prefixes=CHECK,LE,LA64,64,SYSV
++# RUN: llvm-readobj --file-headers %t.elf64_loongarch.dwo | FileCheck %s --check-prefixes=CHECK,LE,LA64,64,SYSV
++
+ !ELF
+ FileHeader:
+   Class:           ELFCLASS32
+@@ -144,6 +152,8 @@ Symbols:
+ # AARCH-SAME:   aarch64
+ # ARM-SAME:     littlearm
+ # HEXAGON-SAME: hexagon
++# LA32-SAME:    loongarch{{$}}
++# LA64-SAME:    loongarch{{$}}
+ # MIPS-SAME:    mips
+ # PPCBE-SAME:   powerpc{{$}}
+ # PPCLE-SAME:   powerpcle{{$}}
+@@ -158,6 +168,8 @@ Symbols:
+ # AARCH-NEXT:    Arch: aarch64
+ # ARM-NEXT:      Arch: arm
+ # HEXAGON-NEXT:  Arch: hexagon
++# LA32-NEXT:     Arch: loongarch32
++# LA64-NEXT:     Arch: loongarch64
+ # MIPSBE-NEXT:   Arch: mips{{$}}
+ # MIPSLE-NEXT:   Arch: mipsel{{$}}
+ # MIPS64BE-NEXT: Arch: mips64{{$}}
+@@ -190,6 +202,8 @@ Symbols:
+ # HEXAGON: Machine: EM_HEXAGON (0xA4)
+ # I386:    Machine: EM_386 (0x3)
+ # IAMCU:   Machine: EM_IAMCU (0x6)
++# LA32:    Machine: EM_LOONGARCH (0x102)
++# LA64:    Machine: EM_LOONGARCH (0x102)
+ # MIPS:    Machine: EM_MIPS (0x8)
+ # PPC32:   Machine: EM_PPC (0x14)
+ # PPC64:   Machine: EM_PPC64 (0x15)
+diff --git a/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test b/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test
+index c26fae7e8..e32dc893f 100644
+--- a/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test
++++ b/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test
+@@ -93,6 +93,15 @@
+ # CHECK: Type: R_LARCH_TLS_GD_HI20 (98)
+ # CHECK: Type: R_LARCH_32_PCREL (99)
+ # CHECK: Type: R_LARCH_RELAX (100)
++# CHECK: Type: R_LARCH_DELETE (101)
++# CHECK: Type: R_LARCH_ALIGN (102)
++# CHECK: Type: R_LARCH_PCREL20_S2 (103)
++# CHECK: Type: R_LARCH_CFA (104)
++# CHECK: Type: R_LARCH_ADD6 (105)
++# CHECK: Type: R_LARCH_SUB6 (106)
++# CHECK: Type: R_LARCH_ADD_ULEB128 (107)
++# CHECK: Type: R_LARCH_SUB_ULEB128 (108)
++# CHECK: Type: R_LARCH_64_PCREL (109)
+ 
+ --- !ELF
+ FileHeader:
+@@ -193,3 +202,12 @@ Sections:
+       - Type: R_LARCH_TLS_GD_HI20
+       - Type: R_LARCH_32_PCREL
+       - Type: R_LARCH_RELAX
++      - Type: R_LARCH_DELETE
++      - Type: R_LARCH_ALIGN
++      - Type: R_LARCH_PCREL20_S2
++      - Type: R_LARCH_CFA
++      - Type: R_LARCH_ADD6
++      - Type: R_LARCH_SUB6
++      - Type: R_LARCH_ADD_ULEB128
++      - Type: R_LARCH_SUB_ULEB128
++      - Type: R_LARCH_64_PCREL
+diff --git a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp
+index 577b83732..42bd8371d 100644
+--- a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp
++++ b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp
+@@ -331,7 +331,11 @@ static const StringMap<MachineInfo> TargetMap{
+     // SPARC
+     {"elf32-sparc", {ELF::EM_SPARC, false, false}},
+     {"elf32-sparcel", {ELF::EM_SPARC, false, true}},
++    // Hexagon
+     {"elf32-hexagon", {ELF::EM_HEXAGON, false, true}},
++    // LoongArch
++    {"elf32-loongarch", {ELF::EM_LOONGARCH, false, true}},
++    {"elf64-loongarch", {ELF::EM_LOONGARCH, true, true}},
+ };
+ 
+ static Expected<TargetInfo>
+diff --git a/llvm/unittests/Object/ELFTest.cpp b/llvm/unittests/Object/ELFTest.cpp
+index 9cf8feb0e..35fc2ec69 100644
+--- a/llvm/unittests/Object/ELFTest.cpp
++++ b/llvm/unittests/Object/ELFTest.cpp
+@@ -233,6 +233,24 @@ TEST(ELFTest, getELFRelocationTypeNameForLoongArch) {
+             getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_32_PCREL));
+   EXPECT_EQ("R_LARCH_RELAX",
+             getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_RELAX));
++  EXPECT_EQ("R_LARCH_DELETE",
++            getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_DELETE));
++  EXPECT_EQ("R_LARCH_ALIGN",
++            getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_ALIGN));
++  EXPECT_EQ("R_LARCH_PCREL20_S2",
++            getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_PCREL20_S2));
++  EXPECT_EQ("R_LARCH_CFA",
++            getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_CFA));
++  EXPECT_EQ("R_LARCH_ADD6",
++            getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_ADD6));
++  EXPECT_EQ("R_LARCH_SUB6",
++            getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_SUB6));
++  EXPECT_EQ("R_LARCH_ADD_ULEB128",
++            getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_ADD_ULEB128));
++  EXPECT_EQ("R_LARCH_SUB_ULEB128",
++            getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_SUB_ULEB128));
++  EXPECT_EQ("R_LARCH_64_PCREL",
++            getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_64_PCREL));
+ }
+ 
+ TEST(ELFTest, getELFRelativeRelocationType) {
+diff --git a/llvm/unittests/TargetParser/TripleTest.cpp b/llvm/unittests/TargetParser/TripleTest.cpp
+index 77de43a16..b19699fc0 100644
+--- a/llvm/unittests/TargetParser/TripleTest.cpp
++++ b/llvm/unittests/TargetParser/TripleTest.cpp
+@@ -1225,12 +1225,14 @@ TEST(TripleTest, BitWidthPredicates) {
+   EXPECT_TRUE(T.isArch32Bit());
+   EXPECT_FALSE(T.isArch64Bit());
+   EXPECT_TRUE(T.isLoongArch());
++  EXPECT_TRUE(T.isLoongArch32());
+ 
+   T.setArch(Triple::loongarch64);
+   EXPECT_FALSE(T.isArch16Bit());
+   EXPECT_FALSE(T.isArch32Bit());
+   EXPECT_TRUE(T.isArch64Bit());
+   EXPECT_TRUE(T.isLoongArch());
++  EXPECT_TRUE(T.isLoongArch64());
+ 
+   T.setArch(Triple::dxil);
+   EXPECT_FALSE(T.isArch16Bit());
diff --git a/llvm.spec b/llvm.spec
index d1913ac638d56a0df0d22601f4a6643a6d2073a4..d810e2f9e30bee42b602e7215da5edf275a47c66 100644
--- a/llvm.spec
+++ b/llvm.spec
@@ -1,10 +1,10 @@
-%define anolis_release 1
+%define anolis_release 2
 
 %global toolchain clang
  
 %undefine _include_frame_pointers
 
-%define gold_arches x86_64 aarch64 loongarch64 
+%define gold_arches x86_64 aarch64 loongarch64
 %ifarch %{gold_arches}
   %bcond_without gold
 %else
@@ -64,6 +64,7 @@ Source2:	https://github.com/llvm/llvm-project/releases/download/llvmorg-%{maj_ve
 Source4:        https://github.com/llvm/llvm-project/releases/download/llvmorg-%{maj_ver}.%{min_ver}.%{patch_ver}/%{third_party_srcdir}.tar.xz
 
 Patch2:         0001-llvm-Add-install-targets-for-gtest.patch
+Patch3:         0001-backport-LoongArch-patches.patch
 Patch201:       0201-third-party-Add-install-targets-for-gtest.patch
 
 BuildRequires: gcc gcc-c++ clang cmake ninja-build zlib-devel libffi-devel
@@ -484,6 +485,9 @@ fi
 %endif
 
 %changelog
+* Thu Aug 17 2023 Chen Li <chenli@loongson.cn> - 16.0.6-2
+- Backport LoongArch patches from llvmorg-17.0.0-rc2
+
 * Sun Jul 9 2023 Funda Wang <fundawang@yeah.net> - 16.0.6-1
 - New version 16.0.6