diff --git a/0001-add-loongarch64-support-for-ucx.patch b/0001-add-loongarch64-support-for-ucx.patch index e3389c594291e07f23cc7671f0d24237e8b2ffc2..7d38926229827caed610aeb97b8dfeadf4c1fc4d 100644 --- a/0001-add-loongarch64-support-for-ucx.patch +++ b/0001-add-loongarch64-support-for-ucx.patch @@ -1,178 +1,101 @@ -From d81ecac7bb0f783c294a647912a8716753f08d71 Mon Sep 17 00:00:00 2001 -From: Min Zhou -From: Wenlong Zhang -Date: Wed, 11 Oct 2023 16:44:41 +0800 -Subject: [PATCH] add loongarch64 support for ucx +From b39c300cb123ae6cba77fe5fdc598b794ed276ca Mon Sep 17 00:00:00 2001 +From: Your Name +Date: Fri, 21 Mar 2025 02:53:23 +0000 +Subject: [PATCH] Add support for loongarch64 --- - config.guess | 3 + - src/ucm/Makefile.am | 2 + - src/ucm/Makefile.in | 19 ++++- - src/ucm/bistro/bistro.c | 2 +- + src/ucm/Makefile.am | 6 +- + src/ucm/bistro/bistro.c | 3 +- src/ucm/bistro/bistro.h | 2 + - src/ucm/bistro/bistro_loongarch64.c | 81 ++++++++++++++++++++ - src/ucm/bistro/bistro_loongarch64.h | 46 +++++++++++ - src/ucs/Makefile.am | 4 + - src/ucs/Makefile.in | 33 +++++++- + src/ucm/bistro/bistro_loongarch64.c | 109 +++++++++++++++++++++++ + src/ucm/bistro/bistro_loongarch64.h | 59 +++++++++++++ + src/ucs/Makefile.am | 5 ++ src/ucs/arch/atomic.h | 2 + src/ucs/arch/bitops.h | 2 + - src/ucs/arch/cpu.h | 2 + + src/ucs/arch/cpu.c | 6 ++ + src/ucs/arch/cpu.h | 4 + src/ucs/arch/global_opts.h | 2 + - src/ucs/arch/loongarch64/bitops.h | 42 ++++++++++ - src/ucs/arch/loongarch64/cpu.h | 101 +++++++++++++++++++++++++ + src/ucs/arch/loongarch64/bitops.h | 34 ++++++++ + src/ucs/arch/loongarch64/cpu.c | 21 +++++ + src/ucs/arch/loongarch64/cpu.h | 115 +++++++++++++++++++++++++ src/ucs/arch/loongarch64/global_opts.c | 24 ++++++ - src/ucs/arch/loongarch64/global_opts.h | 24 ++++++ - 17 files changed, 384 insertions(+), 7 deletions(-) + src/ucs/arch/loongarch64/global_opts.h | 26 ++++++ + src/ucs/configure.m4 | 3 +- + 17 files changed, 419 insertions(+), 4 deletions(-) create mode 100644 src/ucm/bistro/bistro_loongarch64.c create mode 100644 src/ucm/bistro/bistro_loongarch64.h create mode 100644 src/ucs/arch/loongarch64/bitops.h + create mode 100644 src/ucs/arch/loongarch64/cpu.c create mode 100644 src/ucs/arch/loongarch64/cpu.h create mode 100644 src/ucs/arch/loongarch64/global_opts.c create mode 100644 src/ucs/arch/loongarch64/global_opts.h -diff --git a/config.guess b/config.guess -index b79252d..87fc200 100755 ---- a/config.guess -+++ b/config.guess -@@ -883,6 +883,9 @@ EOF - i*86:Minix:*:*) - echo ${UNAME_MACHINE}-pc-minix - exit ;; -+ loongarch64:Linux:*:*) -+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} -+ exit ;; - aarch64:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} - exit ;; diff --git a/src/ucm/Makefile.am b/src/ucm/Makefile.am -index 5140b5a..e2f53b7 100644 +index fa7a722..f39b972 100644 --- a/src/ucm/Makefile.am +++ b/src/ucm/Makefile.am -@@ -31,6 +31,7 @@ noinst_HEADERS = \ - bistro/bistro.h \ +@@ -33,7 +33,8 @@ noinst_HEADERS = \ bistro/bistro_x86_64.h \ bistro/bistro_aarch64.h \ -+ bistro/bistro_loongarch64.h \ - bistro/bistro_ppc64.h + bistro/bistro_ppc64.h \ +- bistro/bistro_rv64.h ++ bistro/bistro_rv64.h \ ++ bistro/bistro_loongarch64.h libucm_la_SOURCES = \ -@@ -44,6 +45,7 @@ libucm_la_SOURCES = \ - bistro/bistro.c \ + event/event.c \ +@@ -47,7 +48,8 @@ libucm_la_SOURCES = \ bistro/bistro_x86_64.c \ bistro/bistro_aarch64.c \ -+ bistro/bistro_loongarch64.c \ - bistro/bistro_ppc64.c + bistro/bistro_ppc64.c \ +- bistro/bistro_rv64.c ++ bistro/bistro_rv64.c \ ++ bistro/bistro_loongarch64.c if HAVE_UCM_PTMALLOC286 -diff --git a/src/ucm/Makefile.in b/src/ucm/Makefile.in -index b83d07b..3202d87 100644 ---- a/src/ucm/Makefile.in -+++ b/src/ucm/Makefile.in -@@ -192,7 +192,7 @@ am__libucm_la_SOURCES_DIST = event/event.c malloc/malloc_hook.c \ - mmap/install.c util/replace.c util/log.c util/reloc.c \ - util/sys.c bistro/bistro.c bistro/bistro_x86_64.c \ - bistro/bistro_aarch64.c bistro/bistro_ppc64.c \ -- ptmalloc286/malloc.c -+ bistro/bistro_loongarch64.c ptmalloc286/malloc.c - am__dirstamp = $(am__leading_dot)dirstamp - @HAVE_UCM_PTMALLOC286_TRUE@am__objects_1 = \ - @HAVE_UCM_PTMALLOC286_TRUE@ ptmalloc286/libucm_la-malloc.lo -@@ -202,6 +202,7 @@ am_libucm_la_OBJECTS = event/libucm_la-event.lo \ - util/libucm_la-reloc.lo util/libucm_la-sys.lo \ - bistro/libucm_la-bistro.lo bistro/libucm_la-bistro_x86_64.lo \ - bistro/libucm_la-bistro_aarch64.lo \ -+ bistro/libucm_la-bistro_loongarch64.lo \ - bistro/libucm_la-bistro_ppc64.lo $(am__objects_1) - libucm_la_OBJECTS = $(am_libucm_la_OBJECTS) - AM_V_lt = $(am__v_lt_@AM_V@) -@@ -265,7 +266,7 @@ am__noinst_HEADERS_DIST = event/event.h malloc/malloc_hook.h \ - util/replace.h util/log.h util/reloc.h util/sys.h \ - bistro/bistro_int.h bistro/bistro.h bistro/bistro_x86_64.h \ - bistro/bistro_aarch64.h bistro/bistro_ppc64.h \ -- ptmalloc286/malloc-2.8.6.h -+ bistro/bistro_loongarch64.h ptmalloc286/malloc-2.8.6.h - HEADERS = $(nobase_dist_libucm_la_HEADERS) $(noinst_HEADERS) - RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ - distclean-recursive maintainer-clean-recursive -@@ -568,11 +569,11 @@ noinst_HEADERS = event/event.h malloc/malloc_hook.h malloc/allocator.h \ - mmap/mmap.h util/khash_safe.h util/replace.h util/log.h \ - util/reloc.h util/sys.h bistro/bistro_int.h bistro/bistro.h \ - bistro/bistro_x86_64.h bistro/bistro_aarch64.h \ -- bistro/bistro_ppc64.h $(am__append_3) -+ bistro/bistro_ppc64.h bistro/bistro_loongarch64.h $(am__append_3) - libucm_la_SOURCES = event/event.c malloc/malloc_hook.c mmap/install.c \ - util/replace.c util/log.c util/reloc.c util/sys.c \ - bistro/bistro.c bistro/bistro_x86_64.c bistro/bistro_aarch64.c \ -- bistro/bistro_ppc64.c $(am__append_2) -+ bistro/bistro_ppc64.c bistro/bistro_loongarch64.c $(am__append_2) - all: all-recursive - - .SUFFIXES: -@@ -692,6 +693,8 @@ bistro/libucm_la-bistro_x86_64.lo: bistro/$(am__dirstamp) \ - bistro/$(DEPDIR)/$(am__dirstamp) - bistro/libucm_la-bistro_aarch64.lo: bistro/$(am__dirstamp) \ - bistro/$(DEPDIR)/$(am__dirstamp) -+bistro/libucm_la-bistro_loongarch64.lo: bistro/$(am__dirstamp) \ -+ bistro/$(DEPDIR)/$(am__dirstamp) - bistro/libucm_la-bistro_ppc64.lo: bistro/$(am__dirstamp) \ - bistro/$(DEPDIR)/$(am__dirstamp) - ptmalloc286/$(am__dirstamp): -@@ -726,6 +729,7 @@ distclean-compile: - - @AMDEP_TRUE@@am__include@ @am__quote@bistro/$(DEPDIR)/libucm_la-bistro.Plo@am__quote@ - @AMDEP_TRUE@@am__include@ @am__quote@bistro/$(DEPDIR)/libucm_la-bistro_aarch64.Plo@am__quote@ -+@AMDEP_TRUE@@am__include@ @am__quote@bistro/$(DEPDIR)/libucm_la-bistro_loongarch64.Plo@am__quote@ - @AMDEP_TRUE@@am__include@ @am__quote@bistro/$(DEPDIR)/libucm_la-bistro_ppc64.Plo@am__quote@ - @AMDEP_TRUE@@am__include@ @am__quote@bistro/$(DEPDIR)/libucm_la-bistro_x86_64.Plo@am__quote@ - @AMDEP_TRUE@@am__include@ @am__quote@event/$(DEPDIR)/libucm_la-event.Plo@am__quote@ -@@ -831,6 +835,13 @@ bistro/libucm_la-bistro_aarch64.lo: bistro/bistro_aarch64.c - @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ - @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucm_la_CPPFLAGS) $(CPPFLAGS) $(libucm_la_CFLAGS) $(CFLAGS) -c -o bistro/libucm_la-bistro_aarch64.lo `test -f 'bistro/bistro_aarch64.c' || echo '$(srcdir)/'`bistro/bistro_aarch64.c - -+bistro/libucm_la-bistro_loongarch64.lo: bistro/bistro_loongarch64.c -+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucm_la_CPPFLAGS) $(CPPFLAGS) $(libucm_la_CFLAGS) $(CFLAGS) -MT bistro/libucm_la-bistro_loongarch64.lo -MD -MP -MF bistro/$(DEPDIR)/libucm_la-bistro_loongarch64.Tpo -c -o bistro/libucm_la-bistro_loongarch64.lo `test -f 'bistro/bistro_loongarch64.c' || echo '$(srcdir)/'`bistro/bistro_loongarch64.c -+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) bistro/$(DEPDIR)/libucm_la-bistro_loongarch64.Tpo bistro/$(DEPDIR)/libucm_la-bistro_loongarch64.Plo -+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bistro/bistro_loongarch64.c' object='bistro/libucm_la-bistro_loongarch64.lo' libtool=yes @AMDEPBACKSLASH@ -+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucm_la_CPPFLAGS) $(CPPFLAGS) $(libucm_la_CFLAGS) $(CFLAGS) -c -o bistro/libucm_la-bistro_loongarch64.lo `test -f 'bistro/bistro_loongarch64.c' || echo '$(srcdir)/'`bistro/bistro_loongarch64.c -+ - bistro/libucm_la-bistro_ppc64.lo: bistro/bistro_ppc64.c - @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucm_la_CPPFLAGS) $(CPPFLAGS) $(libucm_la_CFLAGS) $(CFLAGS) -MT bistro/libucm_la-bistro_ppc64.lo -MD -MP -MF bistro/$(DEPDIR)/libucm_la-bistro_ppc64.Tpo -c -o bistro/libucm_la-bistro_ppc64.lo `test -f 'bistro/bistro_ppc64.c' || echo '$(srcdir)/'`bistro/bistro_ppc64.c - @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) bistro/$(DEPDIR)/libucm_la-bistro_ppc64.Tpo bistro/$(DEPDIR)/libucm_la-bistro_ppc64.Plo + libucm_la_CPPFLAGS += \ diff --git a/src/ucm/bistro/bistro.c b/src/ucm/bistro/bistro.c -index c31741a..ca8fb8a 100644 +index bd70919..d4fbd0f 100644 --- a/src/ucm/bistro/bistro.c +++ b/src/ucm/bistro/bistro.c -@@ -62,7 +62,7 @@ ucs_status_t ucm_bistro_apply_patch(void *dst, void *patch, size_t len) +@@ -1,6 +1,7 @@ + /** + * Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2018. ALL RIGHTS RESERVED. + * Copyright (C) Tactical Computing Labs, LLC. 2022. ALL RIGHTS RESERVED. ++ * Copyright (C) Dandan Zhang, 2024. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ +@@ -121,7 +122,7 @@ ucs_status_t ucm_bistro_apply_patch(void *dst, void *patch, size_t len) return status; } --#if defined(__x86_64__) || defined (__aarch64__) -+#if defined(__x86_64__) || defined (__aarch64__) || defined (__loongarch__) +-#if defined(__x86_64__) || defined (__aarch64__) || defined (__riscv) ++#if defined(__x86_64__) || defined (__aarch64__) || defined (__riscv) || defined(__loongarch64) struct ucm_bistro_restore_point { void *addr; /* address of function to restore */ size_t patch_len; /* patch length */ diff --git a/src/ucm/bistro/bistro.h b/src/ucm/bistro/bistro.h -index 1010004..f4d1662 100644 +index 8d0b907..0bb961d 100644 --- a/src/ucm/bistro/bistro.h +++ b/src/ucm/bistro/bistro.h -@@ -20,6 +20,8 @@ typedef struct ucm_bistro_restore_point ucm_bistro_restore_point_t; - # include "bistro_aarch64.h" - #elif defined(__x86_64__) +@@ -23,6 +23,8 @@ typedef struct ucm_bistro_restore_point ucm_bistro_restore_point_t; # include "bistro_x86_64.h" -+#elif defined(__loongarch__) + #elif defined(__riscv) + # include "bistro_rv64.h" ++#elif defined(__loongarch64) +# include "bistro_loongarch64.h" #else # error "Unsupported architecture" #endif diff --git a/src/ucm/bistro/bistro_loongarch64.c b/src/ucm/bistro/bistro_loongarch64.c new file mode 100644 -index 0000000..0e1fbff +index 0000000..752362d --- /dev/null +++ b/src/ucm/bistro/bistro_loongarch64.c -@@ -0,0 +1,81 @@ +@@ -0,0 +1,109 @@ +/** -+ * Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. ++ * Copyright (C) Xing Li, Dandan Zhang, 2024. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ @@ -181,63 +104,86 @@ index 0000000..0e1fbff +# include "config.h" +#endif + -+/* ******************************************************* -+ * LOONGARCH64 processors family * -+ * ***************************************************** */ -+#if defined(__loongarch__) -+ -+#include -+#include -+#include -+#include ++#if defined(__loongarch64) + ++#include +#include +#include -+#include -+#include -+#include +#include ++#include ++#include + ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+/* Register number used to store indirect jump address. -+ * r20 is the highest numbered temporary register, assuming this one is safe -+ * to use. */ -+#define R20 20 ++#define T0 12 ++#define T2 14 ++#define RA 1 ++#define ZERO 0 + +/** -+ * @brief Generate a mov immediate instruction -+ * -+ * @param[in] _rd, _rj register number (0-31) -+ * @param[in] _si20, _si20 signed mmediate value 20, 12 bits -+ * @param[in] _ui12 immediate value -+ */ -+#define LU12I_W(_rd, _si20) \ -+ ((10 << 25) + (((_si20) & 0xfffff) << 5) + (_rd)) -+ -+#define LU32I_D(_rd, _si20) \ -+ (((22 >> 1) << 25) + (((_si20) & 0xfffff) << 5) + (_rd)) -+ -+#define LU52I_D(_rd, _rj, _si12) \ -+ (((48 >> 2) << 22) + (((_si12) & 0xfff) << 10) + ((_rj) << 5) + (_rd)) ++ * @brief JIRL - Add 16 bit immediate to source register, save to destination ++ * register, jump and link from destination register ++ * ++ * @param[in] _regd source register number (0-31) ++ * @param[in] _regj destination register number (0-31) ++ * @param[in] _imm 16 bit immmediate value ++ */ ++#define JIRL(_regd, _regj, _imm) \ ++ (((0x13) << 26 ) | ((_imm) << 10) | ((_regj) << 5) | (_regd)) ++/** ++ * @brief B - Indirect jump ++ * ++ * @param[in] _imm 26 bit immmediate value ++ */ ++#define B(_imm) \ ++ ((0x14) << 26) | (((_imm) & 0xffff) << 10) | ((_imm) >>16) + -+#define ORI(_rd, _rj, _ui12) \ -+ (((56 >> 2) << 22) + (((_ui12) & 0xfff) << 10) + ((_rj) << 5) + (_rd)) ++/** ++ * @brief PCADDU12I - Add upper intermediate to PC ++ * ++ * @param[in] _regd register number (0-31) ++ * @param[in] _imm 20 bit immmediate value ++ */ ++#define PCADDU12I(_regd, _imm) (((0xe) << 25) | ((_imm) << 5) | (_regd)) ++/** ++ * @brief LD - Load from memory with address from register plus immediate ++ * ++ * @param[in] _regs source register number (0-31) ++ * @param[in] _regd destination register number (0-31) ++ * @param[in] _imm 12 bit immmediate value ++ */ ++#define LD(_regd, _regj, _imm) \ ++ (((0xa3) << 22) | ((_imm) << 10) | ((_regj) << 5) | (_regd)) ++ ++void ucm_bistro_patch_lock(void *dst) ++{ ++ static const ucm_bistro_lock_t self_jmp = { ++ .j = B(0) ++ }; + -+#define JIRL(_rd, _rj, _offs) \ -+ ((19 << 26) + ((_offs) << 10) + ((_rj) << 5) + (_rd)) ++ ucm_bistro_modify_code(dst, &self_jmp); ++} + +ucs_status_t ucm_bistro_patch(void *func_ptr, void *hook, const char *symbol, + void **orig_func_p, + ucm_bistro_restore_point_t **rp) +{ -+ ucm_bistro_patch_t patch = { -+ .lu12i = LU12I_W(R20, (uintptr_t)hook >> 12), -+ .ori = ORI(R20, R20, (uintptr_t)hook), -+ .lu32i = LU32I_D(R20, (uintptr_t)hook >> 32), -+ .lu52i = LU52I_D(R20, R20, (uintptr_t)hook >> 52), -+ .jirl = JIRL(0, R20, 0) -+ }; + ucs_status_t status; ++ ucm_bistro_patch_t patch; ++ ++ patch = (ucm_bistro_patch_t) { ++ .pcaddu12i = PCADDU12I(T0, 0), ++ .ld = LD(T2, T0, 0x10), ++ .jirl = JIRL(0, T2, 0), ++ .spare = 0, ++ .address = (uintptr_t)hook ++ }; + + if (orig_func_p != NULL) { + return UCS_ERR_UNSUPPORTED; @@ -248,18 +194,24 @@ index 0000000..0e1fbff + return status; + } + -+ return ucm_bistro_apply_patch(func_ptr, &patch, sizeof(patch)); ++ return ucm_bistro_apply_patch_atomic(func_ptr, &patch, sizeof(patch)); ++} ++ ++ucs_status_t ucm_bistro_relocate_one(ucm_bistro_relocate_context_t *ctx) ++{ ++ return UCS_ERR_UNSUPPORTED; +} + +#endif diff --git a/src/ucm/bistro/bistro_loongarch64.h b/src/ucm/bistro/bistro_loongarch64.h new file mode 100644 -index 0000000..bbc6c0f +index 0000000..a5557b7 --- /dev/null +++ b/src/ucm/bistro/bistro_loongarch64.h -@@ -0,0 +1,46 @@ +@@ -0,0 +1,59 @@ +/** -+ * Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. ++ * Copyright (C) Tactical Computing Labs, LLC. 2022. ALL RIGHTS RESERVED. ++ * Copyright (C) Dandan Zhang, 2024. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ @@ -268,28 +220,30 @@ index 0000000..bbc6c0f +#ifndef UCM_BISTRO_BISTRO_LOONGARCH64_H_ +#define UCM_BISTRO_BISTRO_LOONGARCH64_H_ + -+#include -+ +#include +#include + ++#include ++#include ++ +#define UCM_BISTRO_PROLOGUE +#define UCM_BISTRO_EPILOGUE + +typedef struct ucm_bistro_patch { -+ uint32_t lu12i; /* lu12i.w r20, addr >> 12 */ -+ uint32_t ori; /* ori r20, r20, addr */ -+ uint32_t lu32i; /* lu32i.d r20, r20, addr >> 32 */ -+ uint32_t lu52i; /* lu52i.d r20, r20, addr >> 52*/ -+ uint32_t jirl; /* jirl 0, r20, 0 */ ++ uint32_t pcaddu12i; ++ uint32_t ld; ++ uint32_t jirl; ++ uint32_t spare; ++ uint64_t address; +} UCS_S_PACKED ucm_bistro_patch_t; + ++ +/** + * Set library function call hook using Binary Instrumentation + * method (BISTRO): replace function body by user defined call + * + * @param func_ptr Pointer to function to patch. -+ * @param hook User-defined function-replacer. ++ * @param hook User-defined replacement function. + * @param symbol Function name to replace. + * @param orig_func_p Unsupported on this architecture and must be NULL. + * If set to a non-NULL value, this function returns @@ -303,300 +257,250 @@ index 0000000..bbc6c0f + void **orig_func_p, + ucm_bistro_restore_point_t **rp); + ++/* Lock implementation */ ++typedef struct { ++ uint32_t j; /* jump to self */ ++} UCS_S_PACKED ucm_bistro_lock_t; ++ ++/** ++ * Helper functions to improve atomicity of function patching ++ */ ++void ucm_bistro_patch_lock(void *dst); ++ +#endif diff --git a/src/ucs/Makefile.am b/src/ucs/Makefile.am -index 7768002..d893f3c 100644 +index 4a05f47..a13a5cb 100644 --- a/src/ucs/Makefile.am +++ b/src/ucs/Makefile.am -@@ -21,6 +21,7 @@ libucs_la_LIBADD = $(LIBM) $(top_builddir)/src/ucm/libucm.la $(BFD_LIBS) - - nobase_dist_libucs_la_HEADERS = \ +@@ -24,6 +24,7 @@ nobase_dist_libucs_la_HEADERS = \ arch/aarch64/bitops.h \ -+ arch/loongarch64/bitops.h \ arch/ppc64/bitops.h \ + arch/rv64/bitops.h \ ++ arch/loongarch64/bitops.h \ arch/x86_64/bitops.h \ arch/bitops.h \ -@@ -79,12 +80,14 @@ nobase_dist_libucs_la_HEADERS = \ - arch/x86_64/global_opts.h \ + algorithm/crc.h \ +@@ -85,6 +86,7 @@ nobase_dist_libucs_la_HEADERS = \ arch/x86_64/atomic.h \ arch/aarch64/global_opts.h \ -+ arch/loongarch64/global_opts.h \ arch/generic/atomic.h \ ++ arch/loongarch64/global_opts.h \ arch/ppc64/global_opts.h \ + arch/rv64/global_opts.h \ arch/global_opts.h - +@@ -92,6 +94,7 @@ nobase_dist_libucs_la_HEADERS = \ noinst_HEADERS = \ arch/aarch64/cpu.h \ -+ arch/loongarch64/cpu.h \ arch/generic/cpu.h \ ++ arch/loongarch64/cpu.h \ arch/ppc64/cpu.h \ + arch/rv64/cpu.h \ arch/x86_64/cpu.h \ -@@ -134,6 +137,7 @@ libucs_la_SOURCES = \ +@@ -149,6 +152,8 @@ libucs_la_SOURCES = \ algorithm/string_distance.c \ arch/aarch64/cpu.c \ arch/aarch64/global_opts.c \ ++ arch/loongarch64/cpu.c \ + arch/loongarch64/global_opts.c \ arch/ppc64/timebase.c \ arch/ppc64/global_opts.c \ - arch/x86_64/cpu.c \ -diff --git a/src/ucs/Makefile.in b/src/ucs/Makefile.in -index 34fcd02..54855dd 100644 ---- a/src/ucs/Makefile.in -+++ b/src/ucs/Makefile.in -@@ -202,6 +202,7 @@ am__libucs_la_SOURCES_DIST = algorithm/crc.c algorithm/qsort_r.c \ - algorithm/string_distance.c arch/aarch64/cpu.c \ - arch/aarch64/global_opts.c arch/ppc64/timebase.c \ - arch/ppc64/global_opts.c arch/x86_64/cpu.c \ -+ arch/loongarch64/global_opts.c \ - arch/x86_64/global_opts.c arch/cpu.c async/async.c \ - async/signal.c async/pipe.c async/eventfd.c async/thread.c \ - config/global_opts.c config/ucm_opts.c config/ini.c \ -@@ -229,6 +230,7 @@ am__dirstamp = $(am__leading_dot)dirstamp - algorithm/libucs_la-string_distance.lo \ - arch/aarch64/libucs_la-cpu.lo \ - arch/aarch64/libucs_la-global_opts.lo \ -+ arch/loongarch64/libucs_la-global_opts.lo \ - arch/ppc64/libucs_la-timebase.lo \ - arch/ppc64/libucs_la-global_opts.lo \ - arch/x86_64/libucs_la-cpu.lo \ -@@ -639,6 +641,7 @@ libucs_ladir = $(includedir)/ucs - libucs_la_LIBADD = $(LIBM) $(top_builddir)/src/ucm/libucm.la $(BFD_LIBS) - nobase_dist_libucs_la_HEADERS = \ - arch/aarch64/bitops.h \ -+ arch/loongarch64/bitops.h \ - arch/ppc64/bitops.h \ - arch/x86_64/bitops.h \ - arch/bitops.h \ -@@ -697,12 +700,14 @@ nobase_dist_libucs_la_HEADERS = \ - arch/x86_64/global_opts.h \ - arch/x86_64/atomic.h \ - arch/aarch64/global_opts.h \ -+ arch/loongarch64/global_opts.h \ - arch/generic/atomic.h \ - arch/ppc64/global_opts.h \ - arch/global_opts.h - - noinst_HEADERS = \ - arch/aarch64/cpu.h \ -+ arch/loongarch64/cpu.h \ - arch/generic/cpu.h \ - arch/ppc64/cpu.h \ - arch/x86_64/cpu.h \ -@@ -751,6 +756,7 @@ libucs_la_SOURCES = algorithm/crc.c algorithm/qsort_r.c \ - algorithm/string_distance.c arch/aarch64/cpu.c \ - arch/aarch64/global_opts.c arch/ppc64/timebase.c \ - arch/ppc64/global_opts.c arch/x86_64/cpu.c \ -+ arch/loongarch64/global_opts.c \ - arch/x86_64/global_opts.c arch/cpu.c async/async.c \ - async/signal.c async/pipe.c async/eventfd.c async/thread.c \ - config/global_opts.c config/ucm_opts.c config/ini.c \ -@@ -868,6 +874,16 @@ arch/aarch64/libucs_la-cpu.lo: arch/aarch64/$(am__dirstamp) \ - arch/aarch64/$(DEPDIR)/$(am__dirstamp) - arch/aarch64/libucs_la-global_opts.lo: arch/aarch64/$(am__dirstamp) \ - arch/aarch64/$(DEPDIR)/$(am__dirstamp) -+arch/loongarch64/$(am__dirstamp): -+ @$(MKDIR_P) arch/loongarch64 -+ @: > arch/loongarch64/$(am__dirstamp) -+arch/loongarch64/$(DEPDIR)/$(am__dirstamp): -+ @$(MKDIR_P) arch/loongarch64/$(DEPDIR) -+ @: > arch/loongarch64/$(DEPDIR)/$(am__dirstamp) -+arch/loongarch64/libucs_la-cpu.lo: arch/loongarch64/$(am__dirstamp) \ -+ arch/loongarch64/$(DEPDIR)/$(am__dirstamp) -+arch/loongarch64/libucs_la-global_opts.lo: arch/loongarch64/$(am__dirstamp) \ -+ arch/loongarch64/$(DEPDIR)/$(am__dirstamp) - arch/ppc64/$(am__dirstamp): - @$(MKDIR_P) arch/ppc64 - @: > arch/ppc64/$(am__dirstamp) -@@ -1152,6 +1168,8 @@ mostlyclean-compile: - -rm -f arch/*.lo - -rm -f arch/aarch64/*.$(OBJEXT) - -rm -f arch/aarch64/*.lo -+ -rm -f arch/loongarch64/*.$(OBJEXT) -+ -rm -f arch/loongarch64/*.lo - -rm -f arch/ppc64/*.$(OBJEXT) - -rm -f arch/ppc64/*.lo - -rm -f arch/x86_64/*.$(OBJEXT) -@@ -1190,6 +1208,7 @@ distclean-compile: - @AMDEP_TRUE@@am__include@ @am__quote@arch/aarch64/$(DEPDIR)/libucs_la-cpu.Plo@am__quote@ - @AMDEP_TRUE@@am__include@ @am__quote@arch/aarch64/$(DEPDIR)/libucs_la-global_opts.Plo@am__quote@ - @AMDEP_TRUE@@am__include@ @am__quote@arch/aarch64/$(DEPDIR)/libucs_la-memcpy_thunderx2.Plo@am__quote@ -+@AMDEP_TRUE@@am__include@ @am__quote@arch/loongarch64/$(DEPDIR)/libucs_la-global_opts.Plo@am__quote@ - @AMDEP_TRUE@@am__include@ @am__quote@arch/ppc64/$(DEPDIR)/libucs_la-global_opts.Plo@am__quote@ - @AMDEP_TRUE@@am__include@ @am__quote@arch/ppc64/$(DEPDIR)/libucs_la-timebase.Plo@am__quote@ - @AMDEP_TRUE@@am__include@ @am__quote@arch/x86_64/$(DEPDIR)/libucs_la-cpu.Plo@am__quote@ -@@ -1336,6 +1355,13 @@ arch/aarch64/libucs_la-global_opts.lo: arch/aarch64/global_opts.c - @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ - @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o arch/aarch64/libucs_la-global_opts.lo `test -f 'arch/aarch64/global_opts.c' || echo '$(srcdir)/'`arch/aarch64/global_opts.c - -+arch/loongarch64/libucs_la-global_opts.lo: arch/loongarch64/global_opts.c -+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT arch/loongarch64/libucs_la-global_opts.lo -MD -MP -MF arch/loongarch64/$(DEPDIR)/libucs_la-global_opts.Tpo -c -o arch/loongarch64/libucs_la-global_opts.lo `test -f 'arch/loongarch64/global_opts.c' || echo '$(srcdir)/'`arch/loongarch64/global_opts.c -+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) arch/loongarch64/$(DEPDIR)/libucs_la-global_opts.Tpo arch/loongarch64/$(DEPDIR)/libucs_la-global_opts.Plo -+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='arch/loongarch64/global_opts.c' object='arch/loongarch64/libucs_la-global_opts.lo' libtool=yes @AMDEPBACKSLASH@ -+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o arch/loongarch64/libucs_la-global_opts.lo `test -f 'arch/loongarch64/global_opts.c' || echo '$(srcdir)/'`arch/loongarch64/global_opts.c -+ - arch/ppc64/libucs_la-timebase.lo: arch/ppc64/timebase.c - @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT arch/ppc64/libucs_la-timebase.lo -MD -MP -MF arch/ppc64/$(DEPDIR)/libucs_la-timebase.Tpo -c -o arch/ppc64/libucs_la-timebase.lo `test -f 'arch/ppc64/timebase.c' || echo '$(srcdir)/'`arch/ppc64/timebase.c - @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) arch/ppc64/$(DEPDIR)/libucs_la-timebase.Tpo arch/ppc64/$(DEPDIR)/libucs_la-timebase.Plo -@@ -1792,6 +1818,7 @@ clean-libtool: - -rm -rf algorithm/.libs algorithm/_libs - -rm -rf arch/.libs arch/_libs - -rm -rf arch/aarch64/.libs arch/aarch64/_libs -+ -rm -rf arch/loongarch64/.libs arch/loongarch64/_libs - -rm -rf arch/ppc64/.libs arch/ppc64/_libs - -rm -rf arch/x86_64/.libs arch/x86_64/_libs - -rm -rf async/.libs async/_libs -@@ -2049,6 +2076,8 @@ distclean-generic: - -rm -f arch/$(am__dirstamp) - -rm -f arch/aarch64/$(DEPDIR)/$(am__dirstamp) - -rm -f arch/aarch64/$(am__dirstamp) -+ -rm -f arch/loongarch64/$(DEPDIR)/$(am__dirstamp) -+ -rm -f arch/loongarch64/$(am__dirstamp) - -rm -f arch/ppc64/$(DEPDIR)/$(am__dirstamp) - -rm -f arch/ppc64/$(am__dirstamp) - -rm -f arch/x86_64/$(DEPDIR)/$(am__dirstamp) -@@ -2087,7 +2116,7 @@ clean-am: clean-binPROGRAMS clean-generic clean-libLTLIBRARIES \ - clean-libtool mostlyclean-am - - distclean: distclean-recursive -- -rm -rf algorithm/$(DEPDIR) arch/$(DEPDIR) arch/aarch64/$(DEPDIR) arch/ppc64/$(DEPDIR) arch/x86_64/$(DEPDIR) async/$(DEPDIR) config/$(DEPDIR) datastruct/$(DEPDIR) debug/$(DEPDIR) memory/$(DEPDIR) profile/$(DEPDIR) stats/$(DEPDIR) sys/$(DEPDIR) sys/topo/base/$(DEPDIR) time/$(DEPDIR) type/$(DEPDIR) vfs/base/$(DEPDIR) -+ -rm -rf algorithm/$(DEPDIR) arch/$(DEPDIR) arch/aarch64/$(DEPDIR) arch/loongarch64/$(DEPDIR) arch/ppc64/$(DEPDIR) arch/x86_64/$(DEPDIR) async/$(DEPDIR) config/$(DEPDIR) datastruct/$(DEPDIR) debug/$(DEPDIR) memory/$(DEPDIR) profile/$(DEPDIR) stats/$(DEPDIR) sys/$(DEPDIR) sys/topo/base/$(DEPDIR) time/$(DEPDIR) type/$(DEPDIR) vfs/base/$(DEPDIR) - -rm -f Makefile - distclean-am: clean-am distclean-compile distclean-generic \ - distclean-tags -@@ -2134,7 +2163,7 @@ install-ps-am: - installcheck-am: - - maintainer-clean: maintainer-clean-recursive -- -rm -rf algorithm/$(DEPDIR) arch/$(DEPDIR) arch/aarch64/$(DEPDIR) arch/ppc64/$(DEPDIR) arch/x86_64/$(DEPDIR) async/$(DEPDIR) config/$(DEPDIR) datastruct/$(DEPDIR) debug/$(DEPDIR) memory/$(DEPDIR) profile/$(DEPDIR) stats/$(DEPDIR) sys/$(DEPDIR) sys/topo/base/$(DEPDIR) time/$(DEPDIR) type/$(DEPDIR) vfs/base/$(DEPDIR) -+ -rm -rf algorithm/$(DEPDIR) arch/$(DEPDIR) arch/aarch64/$(DEPDIR) arch/loongarch64/$(DEPDIR) arch/ppc64/$(DEPDIR) arch/x86_64/$(DEPDIR) async/$(DEPDIR) config/$(DEPDIR) datastruct/$(DEPDIR) debug/$(DEPDIR) memory/$(DEPDIR) profile/$(DEPDIR) stats/$(DEPDIR) sys/$(DEPDIR) sys/topo/base/$(DEPDIR) time/$(DEPDIR) type/$(DEPDIR) vfs/base/$(DEPDIR) - -rm -f Makefile - maintainer-clean-am: distclean-am maintainer-clean-generic - + arch/rv64/cpu.c \ diff --git a/src/ucs/arch/atomic.h b/src/ucs/arch/atomic.h -index 6a8551f..2fad8d4 100644 +index 8496479..33a5e74 100644 --- a/src/ucs/arch/atomic.h +++ b/src/ucs/arch/atomic.h -@@ -15,6 +15,8 @@ +@@ -18,6 +18,8 @@ # include "generic/atomic.h" - #elif defined(__aarch64__) + #elif defined(__riscv) # include "generic/atomic.h" -+#elif defined(__loongarch__) ++#elif defined(__loongarch64) +# include "generic/atomic.h" #else # error "Unsupported architecture" #endif diff --git a/src/ucs/arch/bitops.h b/src/ucs/arch/bitops.h -index 77e0057..09e68e6 100644 +index 3e0e530..d8c58ae 100644 --- a/src/ucs/arch/bitops.h +++ b/src/ucs/arch/bitops.h -@@ -20,6 +20,8 @@ BEGIN_C_DECLS - # include "ppc64/bitops.h" - #elif defined(__aarch64__) +@@ -23,6 +23,8 @@ BEGIN_C_DECLS # include "aarch64/bitops.h" -+#elif defined(__loongarch__) + #elif defined(__riscv) + # include "rv64/bitops.h" ++#elif defined(__loongarch64) +# include "loongarch64/bitops.h" #else # error "Unsupported architecture" #endif +diff --git a/src/ucs/arch/cpu.c b/src/ucs/arch/cpu.c +index 307fb61..a50b993 100644 +--- a/src/ucs/arch/cpu.c ++++ b/src/ucs/arch/cpu.c +@@ -60,6 +60,10 @@ const ucs_cpu_builtin_memcpy_t ucs_cpu_builtin_memcpy[UCS_CPU_VENDOR_LAST] = { + .min = UCS_MEMUNITS_INF, + .max = UCS_MEMUNITS_INF + }, ++ [UCS_CPU_VENDOR_GENERIC_LOONGARCH64] = { ++ .min = UCS_MEMUNITS_INF, ++ .max = UCS_MEMUNITS_INF ++ }, + [UCS_CPU_VENDOR_GENERIC_PPC] = { + .min = UCS_MEMUNITS_INF, + .max = UCS_MEMUNITS_INF +@@ -181,6 +185,7 @@ const char *ucs_cpu_vendor_name() + [UCS_CPU_VENDOR_INTEL] = "Intel", + [UCS_CPU_VENDOR_AMD] = "AMD", + [UCS_CPU_VENDOR_GENERIC_ARM] = "Generic ARM", ++ [UCS_CPU_VENDOR_GENERIC_LOONGARCH64] = "Generic LoongArch64", + [UCS_CPU_VENDOR_GENERIC_PPC] = "Generic PPC", + [UCS_CPU_VENDOR_GENERIC_RV64G] = "Generic RV64G", + [UCS_CPU_VENDOR_FUJITSU_ARM] = "Fujitsu ARM", +@@ -206,6 +211,7 @@ const char *ucs_cpu_model_name() + [UCS_CPU_MODEL_ARM_AARCH64] = "ARM 64-bit", + [UCS_CPU_MODEL_AMD_NAPLES] = "Naples", + [UCS_CPU_MODEL_AMD_ROME] = "Rome", ++ [UCS_CPU_MODEL_LOONGARCH64] = "LoongArch 64-bit", + [UCS_CPU_MODEL_AMD_MILAN] = "Milan", + [UCS_CPU_MODEL_AMD_GENOA] = "Genoa", + [UCS_CPU_MODEL_ZHAOXIN_ZHANGJIANG] = "Zhangjiang", diff --git a/src/ucs/arch/cpu.h b/src/ucs/arch/cpu.h -index 719913f..cf6e4de 100644 +index ca25e71..14d2f1e 100644 --- a/src/ucs/arch/cpu.h +++ b/src/ucs/arch/cpu.h -@@ -98,6 +98,8 @@ typedef struct ucs_cpu_builtin_memcpy { - # include "ppc64/cpu.h" - #elif defined(__aarch64__) +@@ -35,6 +35,7 @@ typedef enum ucs_cpu_model { + UCS_CPU_MODEL_AMD_ROME, + UCS_CPU_MODEL_AMD_MILAN, + UCS_CPU_MODEL_AMD_GENOA, ++ UCS_CPU_MODEL_LOONGARCH64, + UCS_CPU_MODEL_ZHAOXIN_ZHANGJIANG, + UCS_CPU_MODEL_ZHAOXIN_WUDAOKOU, + UCS_CPU_MODEL_ZHAOXIN_LUJIAZUI, +@@ -67,6 +68,7 @@ typedef enum ucs_cpu_vendor { + UCS_CPU_VENDOR_INTEL, + UCS_CPU_VENDOR_AMD, + UCS_CPU_VENDOR_GENERIC_ARM, ++ UCS_CPU_VENDOR_GENERIC_LOONGARCH64, + UCS_CPU_VENDOR_GENERIC_PPC, + UCS_CPU_VENDOR_FUJITSU_ARM, + UCS_CPU_VENDOR_ZHAOXIN, +@@ -107,6 +109,8 @@ typedef struct ucs_cpu_builtin_memcpy { # include "aarch64/cpu.h" -+#elif defined(__loongarch__) + #elif defined(__riscv) + # include "rv64/cpu.h" ++#elif defined(__loongarch64) +# include "loongarch64/cpu.h" #else # error "Unsupported architecture" #endif diff --git a/src/ucs/arch/global_opts.h b/src/ucs/arch/global_opts.h -index 8786f13..28753ef 100644 +index 550d22b..9cdb517 100644 --- a/src/ucs/arch/global_opts.h +++ b/src/ucs/arch/global_opts.h -@@ -15,6 +15,8 @@ - # include "ppc64/global_opts.h" - #elif defined(__aarch64__) +@@ -18,6 +18,8 @@ # include "aarch64/global_opts.h" -+#elif defined(__loongarch__) + #elif defined(__riscv) + # include "rv64/global_opts.h" ++#elif defined(__loongarch64) +# include "loongarch64/global_opts.h" #else # error "Unsupported architecture" #endif diff --git a/src/ucs/arch/loongarch64/bitops.h b/src/ucs/arch/loongarch64/bitops.h new file mode 100644 -index 0000000..86ad7f1 +index 0000000..2d0491c --- /dev/null +++ b/src/ucs/arch/loongarch64/bitops.h -@@ -0,0 +1,42 @@ +@@ -0,0 +1,34 @@ +/** -+* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. ++* Copyright (C) Tactical Computing Labs, LLC. 2022. ALL RIGHTS RESERVED. ++* Copyright (C) Dandan Zhang, 2024. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + -+#ifndef UCS_LOONGARCH64_BITOPS_H_ -+#define UCS_LOONGARCH64_BITOPS_H_ ++#ifndef UCS_ARCH_LOONGARCH64_BITOPS_H_ ++#define UCS_ARCH_LOONGARCH64_BITOPS_H_ + +#include +#include + -+ +static UCS_F_ALWAYS_INLINE unsigned __ucs_ilog2_u32(uint32_t n) +{ -+ int bit; -+ asm ("clz.w %0,%1" : "=r" (bit) : "r" (n)); -+ return 31 - bit; ++ return 31 - __builtin_clz(n); +} + +static UCS_F_ALWAYS_INLINE unsigned __ucs_ilog2_u64(uint64_t n) +{ -+ int bit; -+ asm ("clz.d %0,%1" : "=r" (bit) : "r" (n)); -+ return 63 - bit; ++ return 63 - __builtin_clzll(n); +} + +static UCS_F_ALWAYS_INLINE unsigned ucs_ffs32(uint32_t n) +{ -+ int bit; -+ asm ("ctz.w %0,%1" : "=r" (bit) : "r" (n)); -+ return 31 - bit; ++ return __ucs_ilog2_u32(n & -n); +} + +static UCS_F_ALWAYS_INLINE unsigned ucs_ffs64(uint64_t n) +{ -+ int bit; -+ asm ("ctz.d %0,%1" : "=r" (bit) : "r" (n)); -+ return 63 - bit; ++ return __ucs_ilog2_u64(n & -n); ++} ++ ++#endif +diff --git a/src/ucs/arch/loongarch64/cpu.c b/src/ucs/arch/loongarch64/cpu.c +new file mode 100644 +index 0000000..a9d1c06 +--- /dev/null ++++ b/src/ucs/arch/loongarch64/cpu.c +@@ -0,0 +1,21 @@ ++/** ++* Copyright (C) Tactical Computing Labs, LLC. 2022. ALL RIGHTS RESERVED. ++* Copyright (C) Dandan Zhang, 2024. ALL RIGHTS RESERVED. ++* ++* See file LICENSE for terms. ++*/ ++ ++#if defined(__loongarch64) ++ ++#ifdef HAVE_CONFIG_H ++# include "config.h" ++#endif ++ ++#include ++ ++ucs_cpu_vendor_t ucs_arch_get_cpu_vendor() ++{ ++ return UCS_CPU_VENDOR_GENERIC_LOONGARCH64; +} + +#endif diff --git a/src/ucs/arch/loongarch64/cpu.h b/src/ucs/arch/loongarch64/cpu.h new file mode 100644 -index 0000000..4f3223e +index 0000000..b2380f6 --- /dev/null +++ b/src/ucs/arch/loongarch64/cpu.h -@@ -0,0 +1,101 @@ +@@ -0,0 +1,115 @@ +/** -+* Copyright (C) Mellanox Technologies Ltd. 2001-2013. ALL RIGHTS RESERVED. -+* Copyright (C) ARM Ltd. 2016-2017. ALL RIGHTS RESERVED. ++* Copyright (C) Tactical Computing Labs, LLC. 2022. ALL RIGHTS RESERVED. ++* Copyright (C) Rivos Inc. 2023 ++* Copyright (C) Dandan Zhang, 2024. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + ++#ifndef UCS_ARCH_LOONGARCH64_CPU_H_ ++#define UCS_ARCH_LOONGARCH64_CPU_H_ + -+#ifndef UCS_LOONGARCH64_CPU_H_ -+#define UCS_LOONGARCH64_CPU_H_ -+ ++#include ++#include ++#include +#include -+#include +#include -+#include ++ ++#include ++#include +#include +#include -+#include ++#include ++#include ++#include + +BEGIN_C_DECLS + @@ -604,43 +508,37 @@ index 0000000..4f3223e + +#define UCS_ARCH_CACHE_LINE_SIZE 64 + -+/* Assume the worst - weak memory ordering */ -+#define ucs_memory_bus_fence() asm volatile ("dbar 0"::: "memory") -+#define ucs_memory_bus_store_fence() ucs_memory_bus_fence() -+#define ucs_memory_bus_load_fence() ucs_memory_bus_fence() -+#define ucs_memory_bus_cacheline_wc_flush() -+#define ucs_memory_cpu_fence() ucs_memory_bus_fence() -+#define ucs_memory_cpu_store_fence() asm volatile ("dbar 0 \n" \ -+ ::: "memory") -+#define ucs_memory_cpu_load_fence() asm volatile ("dbar 0 \n" \ -+ "dbar 0 \n" \ -+ ::: "memory") -+#define ucs_memory_cpu_wc_fence() ucs_memory_bus_fence() ++#define ucs_loongarch64_dbar(hint) asm volatile ("dbar %0 " : : "I"(hint) : "memory") + ++#define crwrw 0b00000 ++#define cr_r_ 0b00101 ++#define c_w_w 0b01010 + -+static inline uint64_t ucs_arch_read_hres_clock() -+{ -+ uint64_t tb; -+ __drdtime_t tsc; -+ tsc = __rdtime_d(); -+ tb = tsc.dvalue; -+ return tb; -+} ++#define orwrw 0b10000 ++#define or_r_ 0b10101 ++#define o_w_w 0b11010 + -+static inline double ucs_arch_get_clocks_per_sec() { -+ uint64_t freq; -+ freq = 100 * 1000 * 1000; -+ return (double) freq; -+} ++#define orw_w 0b10010 ++#define or_rw 0b10100 + -+static inline ucs_cpu_model_t ucs_arch_get_cpu_model() ++#define ucs_memory_bus_store_fence() ucs_loongarch64_dbar(c_w_w) ++#define ucs_memory_bus_load_fence() ucs_loongarch64_dbar(cr_r_) ++ ++ ++#define ucs_memory_cpu_fence() ucs_loongarch64_dbar(orwrw) ++#define ucs_memory_bus_cacheline_wc_flush() ucs_memory_cpu_fence() ++#define ucs_memory_cpu_store_fence() ucs_loongarch64_dbar(o_w_w) ++#define ucs_memory_cpu_load_fence() ucs_loongarch64_dbar(or_r_) ++#define ucs_memory_cpu_wc_fence() ucs_memory_cpu_fence() ++ ++static inline double ucs_arch_get_clocks_per_sec() +{ -+ return UCS_CPU_MODEL_UNKNOWN; ++ return ucs_arch_generic_get_clocks_per_sec(); +} + -+static inline ucs_cpu_vendor_t ucs_arch_get_cpu_vendor() ++static inline ucs_cpu_model_t ucs_arch_get_cpu_model() +{ -+ return UCS_CPU_VENDOR_UNKNOWN; ++ return UCS_CPU_MODEL_LOONGARCH64; +} + +static inline int ucs_arch_get_cpu_flag() @@ -652,16 +550,35 @@ index 0000000..4f3223e +{ +} + ++ucs_cpu_vendor_t ucs_arch_get_cpu_vendor(); ++ ++static inline ucs_status_t ucs_arch_get_cache_size(size_t *cache_sizes) ++{ ++ return UCS_ERR_UNSUPPORTED; ++} ++ ++static inline uint64_t ucs_arch_read_hres_clock() ++{ ++ uint64_t cnt_id, time; ++ __asm__ __volatile__ ( ++ "rdtime.d %0, %1\n\t" ++ :"=&r"(time), "=&r"(cnt_id) ++ ); ++ return time; ++} ++ +#define ucs_arch_wait_mem ucs_arch_generic_wait_mem + +#if !HAVE___CLEAR_CACHE +static inline void ucs_arch_clear_cache(void *start, void *end) +{ -+ ucs_memory_cpu_fence(); ++ usc_memory_cpu_fence(); +} +#endif + -+static inline void *ucs_memcpy_relaxed(void *dst, const void *src, size_t len) ++static inline void *ucs_memcpy_relaxed(void *dst, const void *src, size_t len, ++ ucs_arch_memcpy_hint_t hint, ++ size_t total_len) +{ + return memcpy(dst, src, len); +} @@ -672,27 +589,22 @@ index 0000000..4f3223e + memcpy(dst, src, len); +} + -+static inline ucs_status_t ucs_arch_get_cache_size(size_t *cache_sizes) -+{ -+ return UCS_ERR_UNSUPPORTED; -+} -+ +END_C_DECLS + +#endif diff --git a/src/ucs/arch/loongarch64/global_opts.c b/src/ucs/arch/loongarch64/global_opts.c new file mode 100644 -index 0000000..17f7a2a +index 0000000..45b9b2a --- /dev/null +++ b/src/ucs/arch/loongarch64/global_opts.c @@ -0,0 +1,24 @@ +/** -+* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. ++* Copyright (C) Tactical Computing Labs, LLC. 2022. ALL RIGHTS RESERVED. ++* Copyright (C) Dandan Zhang, 2024. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ -+ -+#if defined(__loongarch__) ++#if defined(__loongarch64) + +#ifdef HAVE_CONFIG_H +# include "config.h" @@ -702,7 +614,7 @@ index 0000000..17f7a2a +#include + +ucs_config_field_t ucs_arch_global_opts_table[] = { -+ {NULL} ++ {NULL} +}; + +void ucs_arch_print_memcpy_limits(ucs_arch_global_opts_t *config) @@ -712,20 +624,22 @@ index 0000000..17f7a2a +#endif diff --git a/src/ucs/arch/loongarch64/global_opts.h b/src/ucs/arch/loongarch64/global_opts.h new file mode 100644 -index 0000000..b43786c +index 0000000..d22c8b1 --- /dev/null +++ b/src/ucs/arch/loongarch64/global_opts.h -@@ -0,0 +1,24 @@ +@@ -0,0 +1,26 @@ +/** -+* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. ++* Copyright (C) Tactical Computing Labs, LLC. 2022. ALL RIGHTS RESERVED. ++* Copyright (C) Dandan Zhang, 2024. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + -+ +#ifndef UCS_LOONGARCH64_GLOBAL_OPTS_H_ +#define UCS_LOONGARCH64_GLOBAL_OPTS_H_ + ++#include ++ +#include + +BEGIN_C_DECLS @@ -740,6 +654,27 @@ index 0000000..b43786c +END_C_DECLS + +#endif +diff --git a/src/ucs/configure.m4 b/src/ucs/configure.m4 +index 814f44d..65594c8 100644 +--- a/src/ucs/configure.m4 ++++ b/src/ucs/configure.m4 +@@ -3,6 +3,7 @@ + # Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. + # Copyright (C) ARM, Ltd. 2016. ALL RIGHTS RESERVED. + # Copyright (C) Tactical Computing Labs, LLC. 2022. ALL RIGHTS RESERVED. ++# Copyright (C) Dandan Zhang, 2024. ALL RIGHTS RESERVED. + # See file LICENSE for terms. + # + +@@ -239,7 +240,7 @@ AC_ARG_WITH([cache-line-size], + [AS_HELP_STRING([--with-cache-line-size=SIZE], + [Build UCX with cache line size defined by user. This parameter + overwrites default cache line sizes defines in +- UCX (x86-64: 64, Power: 128, ARMv8: 64/128, RISCV: 64). The supported values are: 64, 128])], ++ UCX (x86-64: 64, Power: 128, ARMv8: 64/128, RISCV: 64, LoongArch: 64). The supported values are: 64, 128])], + [], + [with_cache_line_size=no]) + -- -2.40.1 +2.41.0 diff --git a/0001-fix-build-error-for-ucx.patch b/0001-fix-build-error-for-ucx.patch index bf27090d43034d53626ea35120e5fcbfe47a309d..03942348994db23f1b3c563323db832fd3fbfbc3 100644 --- a/0001-fix-build-error-for-ucx.patch +++ b/0001-fix-build-error-for-ucx.patch @@ -1,26 +1,26 @@ -From 63a1803b8661d5e3ca6115410df3669b8be20f9b Mon Sep 17 00:00:00 2001 -From: Wenlong Zhang -Date: Wed, 20 Dec 2023 02:39:35 +0000 -Subject: [PATCH] fix build error for ucx +From 104aa9d4345bb3f0c7c8f12fd2db2aed468d8367 Mon Sep 17 00:00:00 2001 +From: Your Name +Date: Fri, 21 Mar 2025 03:03:34 +0000 +Subject: [PATCH] 01 fix build for loongarch64 --- - src/ucm/bistro/bistro_loongarch64.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) + src/ucs/arch/loongarch64/cpu.h | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) -diff --git a/src/ucm/bistro/bistro_loongarch64.c b/src/ucm/bistro/bistro_loongarch64.c -index 0e1fbff..b72e8d5 100644 ---- a/src/ucm/bistro/bistro_loongarch64.c -+++ b/src/ucm/bistro/bistro_loongarch64.c -@@ -77,5 +77,8 @@ ucs_status_t ucm_bistro_patch(void *func_ptr, void *hook, const char *symbol, - - return ucm_bistro_apply_patch(func_ptr, &patch, sizeof(patch)); +diff --git a/src/ucs/arch/loongarch64/cpu.h b/src/ucs/arch/loongarch64/cpu.h +index b2380f6..7942778 100644 +--- a/src/ucs/arch/loongarch64/cpu.h ++++ b/src/ucs/arch/loongarch64/cpu.h +@@ -97,9 +97,7 @@ static inline void ucs_arch_clear_cache(void *start, void *end) } -- -+ucs_status_t ucm_bistro_relocate_one(ucm_bistro_relocate_context_t *ctx) -+{ -+ return UCS_ERR_UNSUPPORTED; -+} #endif + +-static inline void *ucs_memcpy_relaxed(void *dst, const void *src, size_t len, +- ucs_arch_memcpy_hint_t hint, +- size_t total_len) ++static inline void *ucs_memcpy_relaxed(void *dst, const void *src, size_t len) + { + return memcpy(dst, src, len); + } -- 2.41.0 - diff --git a/UCS-TIME-Fix-undeclared-INFINITY-error-in-ucs_time_u.patch b/UCS-TIME-Fix-undeclared-INFINITY-error-in-ucs_time_u.patch new file mode 100644 index 0000000000000000000000000000000000000000..f6dd8de8af1a27206cbeaee673a1ad7fda30608c --- /dev/null +++ b/UCS-TIME-Fix-undeclared-INFINITY-error-in-ucs_time_u.patch @@ -0,0 +1,29 @@ +From b0a275a5492125a13020cd095fe9934e0b5e7c6a Mon Sep 17 00:00:00 2001 +From: Aboorva Devarajan +Date: Wed, 19 Jun 2024 13:47:14 -0400 +Subject: [PATCH] UCS/TIME: Fix undeclared INFINITY error in + ucs_time_units_to_sec() + +Included in ucs_time_units_to_sec() to resolve compilation +errors due to missing INFINITY constant. + +Signed-off-by: Aboorva Devarajan +--- + src/ucs/time/time.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/src/ucs/time/time.h b/src/ucs/time/time.h +index cff9810cdad8..c51362273f8d 100644 +--- a/src/ucs/time/time.h ++++ b/src/ucs/time/time.h +@@ -11,6 +11,7 @@ + #include + #include + #include ++#include + + BEGIN_C_DECLS + +-- +2.47.0 + diff --git a/autogen.sh b/autogen.sh new file mode 100644 index 0000000000000000000000000000000000000000..d944671266329e27c03cfd613baf554e9658a33f --- /dev/null +++ b/autogen.sh @@ -0,0 +1,41 @@ +#!/bin/sh + +usage() +{ + echo "Usage: autogen.sh " + echo + echo " -h|--help Show this help message" + echo " --with-ucg Fetch UCG submodule" + echo +} + +with_ucg="no" + +for key in "$@" +do + case $key in + -h|--help) + usage + exit 0 + ;; + --with-ucg) + with_ucg="yes" + ;; + *) + usage + exit -2 + ;; + esac +done + +rm -rf autom4te.cache +mkdir -p config/m4 config/aux + +if [ "X$with_ucg" = "Xyes" ] +then + git submodule update --init --recursive --remote +fi + +autoreconf -v --install || exit 1 +rm -rf autom4te.cache + diff --git a/ucx-1.15.0.tar.gz b/ucx-1.17.0.tar.gz similarity index 30% rename from ucx-1.15.0.tar.gz rename to ucx-1.17.0.tar.gz index d0b9adcfd34738ec33a8bd940d49cdbec2055417..cec05d70072c385d5f6005f4c8dc17c442d360c7 100644 Binary files a/ucx-1.15.0.tar.gz and b/ucx-1.17.0.tar.gz differ diff --git a/ucx.spec b/ucx.spec index c7ec0bd003cd0d7693f2232681ae1c52bb02af6c..0b9678f532d9e686ee78238d73d8bc011ef6a0d1 100644 --- a/ucx.spec +++ b/ucx.spec @@ -1,4 +1,4 @@ -%define anolis_release 2 +%define anolis_release 1 %{!?configure_options: %global configure_options %{nil}} %bcond_without cma @@ -13,7 +13,7 @@ %bcond_with vfs Name: ucx -Version: 1.15.0 +Version: 1.17.0 Release: %{anolis_release}%{?dist} Summary: UCX is a communication library implementing high-performance messaging @@ -30,8 +30,12 @@ License: BSD-3-Clause AND MIT AND CC-PDDC AND (BSD-3-Clause OR Apache-2.0) URL: http://www.openucx.org Source: https://github.com/openucx/%{name}/releases/download/v%{version}/ucx-%{version}.tar.gz +#https://github.com/openucx/ucx/blob/master/autogen.sh +Source1: autogen.sh Patch001: 0001-add-loongarch64-support-for-ucx.patch Patch002: 0001-fix-build-error-for-ucx.patch +# https://github.com/openucx/ucx/commit/b0a275a5492125a13020cd095fe9934e0b5e7c6a +Patch003: UCS-TIME-Fix-undeclared-INFINITY-error-in-ucs_time_u.patch %if %{defined extra_deps} Requires: %{?extra_deps} @@ -85,18 +89,16 @@ Provides header files and examples for developing with UCX. %setup -q %patch001 -p1 %patch002 -p1 +%patch003 -p1 %build -# Remove these fixes when upgrading to version 1.14 -# Fix for improper declaration, upstream fix: -# https://github.com/openucx/ucx/commit/8d6032ec864190c9f079d96e731c5004a975e153 -sed -i 's/unsigned advice)/uct_mem_advice_t advice)/g' src/uct/base/uct_md.c # Enable use of GCC 13, upstream fix: # https://github.com/openucx/ucx/commit/8f70e898b43d1bde1ff3fae56bf0ac5aac285997 sed -i '/#include / a #include ' test/apps/sockaddr/sa_util.h %define _with_arg() %{expand:%%{?with_%{1}:--with-%{2}}%%{!?with_%{1}:--without-%{2}}} %define _enable_arg() %{expand:%%{?with_%{1}:--enable-%{2}}%%{!?with_%{1}:--disable-%{2}}} +bash %{SOURCE1} %configure --disable-optimizations \ --disable-logging \ --disable-debug \ @@ -129,12 +131,14 @@ rm -f %{buildroot}%{_libdir}/ucx/lib*.so %{_libdir}/lib*.so.* %{_bindir}/ucx_info %{_bindir}/ucx_perftest +%{_bindir}/ucx_perftest_daemon %{_bindir}/ucx_read_profile %{_bindir}/io_demo %{_datadir}/ucx %exclude %{_datadir}/ucx/examples %doc README AUTHORS NEWS %license LICENSE +%{_sysconfdir}/ucx/ucx.conf %files devel %{_includedir}/uc* @@ -310,6 +314,11 @@ status, and more. %endif %changelog +* Wed Feb 26 2025 mgb01105731 - 1.17.0-1 +- Update to 1.17.0 from 1.15.0 +- Add patch to fix undeclared INFINITY error in ucs_time_units_to_sec() +- Update loongarch64 patches for new version () + * Wed Dec 20 2023 Wenlong Zhang - 1.15.0-2 - fix build error for ucx