From 09b75bfd23940e7ba43bdcbd5fdc8c80dcc05908 Mon Sep 17 00:00:00 2001 From: "Hu, Lin1" Date: Tue, 30 Sep 2025 14:13:33 +0800 Subject: [PATCH] x86: Backport patches from releases/gcc-14 Some fix for Clearwater Forest and x86 arch. r14-11861-g11b03928bab9a5 i386: Remove CLDEMOTE for clients r14-11889-g8ea12b87ec867a i386: Remove KEYLOCKER related feature since Panther Lake and Clearwater Forest r14-11895-g4d929cd27e66c7 x86-64: Add RDI clobber to tls_global_dynamic_6 r14-11896-g59db34888d289d x86-64: Add RDI clobber to 64-bit dynamic TLS patterns r14-11918-gb7e9ac6549ec5c x86: Enable *mov_(and|or) only for -Oz r14-11923-gc8eb4fcd40c2fa Eliminate redundant vpextrq/vpinsrq when move TI to V4SI. r14-11924-g8b959ca27ee95b x86: Transform to "pushq $-1; popq reg" for -Oz r14-11953-g5a11b0fa070298 x86: Disallow -mtls-dialect=gnu with no_caller_saved_registers r14-11954-g32960f98e639b4 x86: Pass -mno-80387 to compile pr121208-1(a|b).c --- 0024-i386-Remove-CLDEMOTE-for-clients.patch | 102 +++++++ ...OCKER-related-feature-since-Panther-.patch | 73 +++++ ...lobber-to-tls_global_dynamic_64-patt.patch | 76 +++++ ...lobber-to-64-bit-dynamic-TLS-pattern.patch | 143 +++++++++ ...Enable-mov-mode-_-and-or-only-for-Oz.patch | 273 ++++++++++++++++++ ...ant-vpextrq-vpinsrq-when-move-TI-to-.patch | 95 ++++++ ...Transform-to-pushq-1-popq-reg-for-Oz.patch | 67 +++++ ...s-dialect-gnu-with-no_caller_saved_r.patch | 179 ++++++++++++ ...o-80387-to-compile-pr121208-1-a-b-.c.patch | 54 ++++ gcc.spec | 24 +- 10 files changed, 1085 insertions(+), 1 deletion(-) create mode 100644 0024-i386-Remove-CLDEMOTE-for-clients.patch create mode 100644 0025-i386-Remove-KEYLOCKER-related-feature-since-Panther-.patch create mode 100644 0026-x86-64-Add-RDI-clobber-to-tls_global_dynamic_64-patt.patch create mode 100644 0027-x86-64-Add-RDI-clobber-to-64-bit-dynamic-TLS-pattern.patch create mode 100644 0028-x86-Enable-mov-mode-_-and-or-only-for-Oz.patch create mode 100644 0029-Eliminate-redundant-vpextrq-vpinsrq-when-move-TI-to-.patch create mode 100644 0030-x86-Transform-to-pushq-1-popq-reg-for-Oz.patch create mode 100644 0031-x86-Disallow-mtls-dialect-gnu-with-no_caller_saved_r.patch create mode 100644 0032-x86-Pass-mno-80387-to-compile-pr121208-1-a-b-.c.patch diff --git a/0024-i386-Remove-CLDEMOTE-for-clients.patch b/0024-i386-Remove-CLDEMOTE-for-clients.patch new file mode 100644 index 0000000..4f343ae --- /dev/null +++ b/0024-i386-Remove-CLDEMOTE-for-clients.patch @@ -0,0 +1,102 @@ +From 11b03928bab9a52e4ec43a3d5a0ab85e5a8ee67a Mon Sep 17 00:00:00 2001 +From: Haochen Jiang +Date: Tue, 17 Jun 2025 14:08:38 +0800 +Subject: [PATCH] i386: Remove CLDEMOTE for clients + +CLDEMOTE is not enabled on clients according to SDM. SDM only mentioned +it will be enabled on Xeon and Atom servers, not clients. Remove them +since Alder Lake (where it is introduced). + +gcc/ChangeLog: + + * config/i386/i386.h (PTA_ALDERLAKE): Use PTA_GOLDMONT_PLUS + as base to remove PTA_CLDEMOTE. + (PTA_SIERRAFOREST): Add PTA_CLDEMOTE since PTA_ALDERLAKE + does not include that anymore. + * doc/invoke.texi: Update texi file. +--- + gcc/config/i386/i386.h | 8 +++++--- + gcc/doc/invoke.texi | 29 ++++++++++++++--------------- + 2 files changed, 19 insertions(+), 18 deletions(-) + +diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h +index 2fc82b175e6..6a833fd8dbd 100644 +--- a/gcc/config/i386/i386.h ++++ b/gcc/config/i386/i386.h +@@ -2415,12 +2415,14 @@ constexpr wide_int_bitmask PTA_GOLDMONT_PLUS = PTA_GOLDMONT | PTA_RDPID + | PTA_SGX | PTA_PTWRITE; + constexpr wide_int_bitmask PTA_TREMONT = PTA_GOLDMONT_PLUS | PTA_CLWB + | PTA_GFNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_CLDEMOTE | PTA_WAITPKG; +-constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX ++constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_GOLDMONT_PLUS | PTA_CLWB ++ | PTA_GFNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_WAITPKG | PTA_ADX | PTA_AVX + | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_LZCNT + | PTA_PCONFIG | PTA_PKU | PTA_VAES | PTA_VPCLMULQDQ | PTA_SERIALIZE + | PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI; +-constexpr wide_int_bitmask PTA_SIERRAFOREST = PTA_ALDERLAKE | PTA_AVXIFMA +- | PTA_AVXVNNIINT8 | PTA_AVXNECONVERT | PTA_CMPCCXADD | PTA_ENQCMD | PTA_UINTR; ++constexpr wide_int_bitmask PTA_SIERRAFOREST = PTA_ALDERLAKE | PTA_CLDEMOTE ++ | PTA_AVXIFMA | PTA_AVXVNNIINT8 | PTA_AVXNECONVERT | PTA_CMPCCXADD ++ | PTA_ENQCMD | PTA_UINTR; + constexpr wide_int_bitmask PTA_GRANITERAPIDS = PTA_SAPPHIRERAPIDS | PTA_AMX_FP16 + | PTA_PREFETCHI; + constexpr wide_int_bitmask PTA_GRANITERAPIDS_D = PTA_GRANITERAPIDS +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 64728fead51..d8ff23447f4 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -34514,37 +34514,36 @@ VPCLMULQDQ, AVX512BITALG, RDPID and AVX512VPOPCNTDQ instruction set support. + Intel Alder Lake/Raptor Lake/Meteor Lake/Gracemont CPU with 64-bit extensions, + MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, + PCLMUL, RDRND, XSAVE, XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, +-GFNI-SSE, CLWB, MOVDIRI, MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, +-BMI2, F16C, FMA, LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, +-WIDEKL and AVX-VNNI instruction set support. ++GFNI-SSE, CLWB, MOVDIRI, MOVDIR64B, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, ++FMA, LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL and ++AVX-VNNI instruction set support. + + @item arrowlake + Intel Arrow Lake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, + SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, + XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI, +-MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, +-PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, +-UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT and CMPCCXADD instruction set +-support. ++MOVDIR64B, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, PCONFIG, PKU, ++VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, UINTR, AVXIFMA, ++AVXVNNIINT8, AVXNECONVERT and CMPCCXADD instruction set support. + + @item arrowlake-s + @itemx lunarlake + Intel Arrow Lake S/Lunar Lake CPU with 64-bit extensions, MOVBE, MMX, SSE, + SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, + XSAVE, XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, +-MOVDIRI, MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, +-LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, +-UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AVXVNNIINT16, SHA512, +-SM3 and SM4 instruction set support. ++MOVDIRI, MOVDIR64B, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, ++PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, UINTR, ++AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AVXVNNIINT16, SHA512, SM3 and ++SM4 instruction set support. + + @item pantherlake + Intel Panther Lake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, + SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, + XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI, +-MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, +-PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, +-UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AVXVNNIINT16, SHA512, +-SM3, SM4 and PREFETCHI instruction set support. ++MOVDIR64B, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, PCONFIG, PKU, ++VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, UINTR, AVXIFMA, ++AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AVXVNNIINT16, SHA512, SM3, SM4 and ++PREFETCHI instruction set support. + + @item sapphirerapids + @itemx emeraldrapids +-- +2.31.1 + diff --git a/0025-i386-Remove-KEYLOCKER-related-feature-since-Panther-.patch b/0025-i386-Remove-KEYLOCKER-related-feature-since-Panther-.patch new file mode 100644 index 0000000..885e9c5 --- /dev/null +++ b/0025-i386-Remove-KEYLOCKER-related-feature-since-Panther-.patch @@ -0,0 +1,73 @@ +From 8ea12b87ec867ac07ffc0449a5cb5d8108a88527 Mon Sep 17 00:00:00 2001 +From: Haochen Jiang +Date: Fri, 11 Jul 2025 16:16:52 +0800 +Subject: [PATCH] i386: Remove KEYLOCKER related feature since Panther Lake and + Clearwater Forest + +According to July 2025 SDM, Key locker will no longer be supported on +hardware 2025 onwards. This means for Panther Lake and Clearwater Forest, +the feature will not be enabled. Remove them from those two platforms. + +gcc/ChangeLog: + + * config/i386/i386.h (PTA_PANTHERLAKE): Revmoe KL and WIDEKL. + (PTA_CLEARWATERFOREST): Ditto. + * doc/invoke.texi: Revise documentation. +--- + gcc/config/i386/i386.h | 9 +++++---- + gcc/doc/invoke.texi | 12 ++++++------ + 2 files changed, 11 insertions(+), 10 deletions(-) + +diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h +index 6a833fd8dbd..2aed6a2b773 100644 +--- a/gcc/config/i386/i386.h ++++ b/gcc/config/i386/i386.h +@@ -2432,10 +2432,11 @@ constexpr wide_int_bitmask PTA_ARROWLAKE = PTA_ALDERLAKE | PTA_AVXIFMA + | PTA_AVXVNNIINT8 | PTA_AVXNECONVERT | PTA_CMPCCXADD | PTA_UINTR; + constexpr wide_int_bitmask PTA_ARROWLAKE_S = PTA_ARROWLAKE | PTA_AVXVNNIINT16 + | PTA_SHA512 | PTA_SM3 | PTA_SM4; +-constexpr wide_int_bitmask PTA_CLEARWATERFOREST = PTA_SIERRAFOREST +- | PTA_AVXVNNIINT16 | PTA_SHA512 | PTA_SM3 | PTA_SM4 | PTA_USER_MSR +- | PTA_PREFETCHI; +-constexpr wide_int_bitmask PTA_PANTHERLAKE = PTA_ARROWLAKE_S | PTA_PREFETCHI; ++constexpr wide_int_bitmask PTA_CLEARWATERFOREST = ++ (PTA_SIERRAFOREST & (~(PTA_KL | PTA_WIDEKL))) | PTA_AVXVNNIINT16 | PTA_SHA512 ++ | PTA_SM3 | PTA_SM4 | PTA_USER_MSR | PTA_PREFETCHI; ++constexpr wide_int_bitmask PTA_PANTHERLAKE = ++ (PTA_ARROWLAKE_S & (~(PTA_KL | PTA_WIDEKL))) | PTA_PREFETCHI; + constexpr wide_int_bitmask PTA_KNM = PTA_KNL | PTA_AVX5124VNNIW + | PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ; + +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index d8ff23447f4..6e68691a37d 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -34541,9 +34541,9 @@ Intel Panther Lake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, + SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, + XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI, + MOVDIR64B, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, PCONFIG, PKU, +-VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, UINTR, AVXIFMA, +-AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AVXVNNIINT16, SHA512, SM3, SM4 and +-PREFETCHI instruction set support. ++VAES, VPCLMULQDQ, SERIALIZE, HRESET, AVX-VNNI, UINTR, AVXIFMA, AVXVNNIINT8, ++AVXNECONVERT, CMPCCXADD, AVXVNNIINT16, SHA512, SM3, SM4 and PREFETCHI ++instruction set support. + + @item sapphirerapids + @itemx emeraldrapids +@@ -34632,9 +34632,9 @@ Intel Clearwater Forest CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, + SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, + XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, + MOVDIRI, MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, +-LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, +-ENQCMD, UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AVXVNNIINT16, +-SHA512, SM3, SM4, USER_MSR and PREFETCHI instruction set support. ++LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, AVX-VNNI, ENQCMD, ++UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AVXVNNIINT16, SHA512, ++SM3, SM4, USER_MSR and PREFETCHI instruction set support. + + @item knl + Intel Knights Landing CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, +-- +2.31.1 + diff --git a/0026-x86-64-Add-RDI-clobber-to-tls_global_dynamic_64-patt.patch b/0026-x86-64-Add-RDI-clobber-to-tls_global_dynamic_64-patt.patch new file mode 100644 index 0000000..50ca54f --- /dev/null +++ b/0026-x86-64-Add-RDI-clobber-to-tls_global_dynamic_64-patt.patch @@ -0,0 +1,76 @@ +From 4d929cd27e66c7d9c519cbcd77f12e1d58e85689 Mon Sep 17 00:00:00 2001 +From: "H.J. Lu" +Date: Tue, 1 Jul 2025 17:17:06 +0800 +Subject: [PATCH] x86-64: Add RDI clobber to tls_global_dynamic_64 patterns + +*tls_global_dynamic_64_ uses RDI as the __tls_get_addr argument. +Add RDI clobber to tls_global_dynamic_64 patterns to show it. + + PR target/120908 + * config/i386/i386.cc (legitimize_tls_address): Pass RDI to + gen_tls_global_dynamic_64. + * config/i386/i386.md (*tls_global_dynamic_64_): Add RDI + clobber and use it to generate LEA. + (@tls_global_dynamic_64_): Add a clobber. + +Signed-off-by: H.J. Lu +(cherry picked from commit 7710d513a552f1fa1b7485ec6b318bafaa6d4cd7) +--- + gcc/config/i386/i386.cc | 3 ++- + gcc/config/i386/i386.md | 8 +++++--- + 2 files changed, 7 insertions(+), 4 deletions(-) + +diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc +index 2e1a31d0540..94e29fa4321 100644 +--- a/gcc/config/i386/i386.cc ++++ b/gcc/config/i386/i386.cc +@@ -12218,11 +12218,12 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) + if (TARGET_64BIT) + { + rtx rax = gen_rtx_REG (Pmode, AX_REG); ++ rtx rdi = gen_rtx_REG (Pmode, DI_REG); + rtx_insn *insns; + + start_sequence (); + emit_call_insn +- (gen_tls_global_dynamic_64 (Pmode, rax, x, caddr)); ++ (gen_tls_global_dynamic_64 (Pmode, rax, x, caddr, rdi)); + insns = get_insns (); + end_sequence (); + +diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md +index 78d75138721..def39d1a729 100644 +--- a/gcc/config/i386/i386.md ++++ b/gcc/config/i386/i386.md +@@ -21114,7 +21114,8 @@ (define_insn "*tls_global_dynamic_64_" + (match_operand 3))) + (unspec:P [(match_operand 1 "tls_symbolic_operand") + (reg:P SP_REG)] +- UNSPEC_TLS_GD)] ++ UNSPEC_TLS_GD) ++ (clobber (match_operand:P 4 "register_operand" "=D"))] + "TARGET_64BIT" + { + if (!TARGET_X32) +@@ -21131,7 +21132,7 @@ (define_insn "*tls_global_dynamic_64_" + Use data16 prefix instead, which doesn't have this problem. */ + fputs ("\tdata16", asm_out_file); + output_asm_insn +- ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands); ++ ("lea{q}\t{%E1@tlsgd(%%rip), %q4|%q4, %E1@tlsgd[rip]}", operands); + if (TARGET_SUN_TLS || flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT) + fputs (ASM_SHORT "0x6666\n", asm_out_file); + else +@@ -21178,7 +21179,8 @@ (define_expand "@tls_global_dynamic_64_" + (const_int 0))) + (unspec:P [(match_operand 1 "tls_symbolic_operand") + (reg:P SP_REG)] +- UNSPEC_TLS_GD)])] ++ UNSPEC_TLS_GD) ++ (clobber (match_operand:P 3 "register_operand"))])] + "TARGET_64BIT" + "ix86_tls_descriptor_calls_expanded_in_cfun = true;") + +-- +2.31.1 + diff --git a/0027-x86-64-Add-RDI-clobber-to-64-bit-dynamic-TLS-pattern.patch b/0027-x86-64-Add-RDI-clobber-to-64-bit-dynamic-TLS-pattern.patch new file mode 100644 index 0000000..4b4f20b --- /dev/null +++ b/0027-x86-64-Add-RDI-clobber-to-64-bit-dynamic-TLS-pattern.patch @@ -0,0 +1,143 @@ +From 59db34888d289dae8fae2bcbf5e435be53da0edc Mon Sep 17 00:00:00 2001 +From: "H.J. Lu" +Date: Thu, 3 Jul 2025 10:54:39 +0800 +Subject: [PATCH] x86-64: Add RDI clobber to 64-bit dynamic TLS patterns + +*tls_global_dynamic_64_largepic, *tls_local_dynamic_64_ and +*tls_local_dynamic_base_64_largepic use RDI as the __tls_get_addr +argument. Add RDI clobber to these patterns to show it. + +gcc/ + + PR target/120908 + * config/i386/i386.cc (legitimize_tls_address): Pass RDI to + gen_tls_local_dynamic_64. + * config/i386/i386.md (*tls_global_dynamic_64_largepic): Add + RDI clobber and use it to generate LEA. + (*tls_local_dynamic_64_): Likewise. + (*tls_local_dynamic_base_64_largepic): Likewise. + (@tls_local_dynamic_64_): Add a clobber. + +gcc/testsuite/ + + PR target/120908 + * gcc.target/i386/pr120908.c: New test. + +Signed-off-by: H.J. Lu +(cherry picked from commit d8d5e2a8031e74f08f61ccdd727476f97940c5a6) +--- + gcc/config/i386/i386.cc | 3 ++- + gcc/config/i386/i386.md | 18 +++++++++++------- + gcc/testsuite/gcc.target/i386/pr120908.c | 16 ++++++++++++++++ + 3 files changed, 29 insertions(+), 8 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/i386/pr120908.c + +diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc +index 94e29fa4321..edb51f2a9b2 100644 +--- a/gcc/config/i386/i386.cc ++++ b/gcc/config/i386/i386.cc +@@ -12273,12 +12273,13 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) + if (TARGET_64BIT) + { + rtx rax = gen_rtx_REG (Pmode, AX_REG); ++ rtx rdi = gen_rtx_REG (Pmode, DI_REG); + rtx_insn *insns; + rtx eqv; + + start_sequence (); + emit_call_insn +- (gen_tls_local_dynamic_base_64 (Pmode, rax, caddr)); ++ (gen_tls_local_dynamic_base_64 (Pmode, rax, caddr, rdi)); + insns = get_insns (); + end_sequence (); + +diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md +index def39d1a729..b02057c5f0f 100644 +--- a/gcc/config/i386/i386.md ++++ b/gcc/config/i386/i386.md +@@ -21156,14 +21156,15 @@ (define_insn "*tls_global_dynamic_64_largepic" + (match_operand 4))) + (unspec:DI [(match_operand 1 "tls_symbolic_operand") + (reg:DI SP_REG)] +- UNSPEC_TLS_GD)] ++ UNSPEC_TLS_GD) ++ (clobber (match_operand:DI 5 "register_operand" "=D"))] + "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF + && GET_CODE (operands[3]) == CONST + && GET_CODE (XEXP (operands[3], 0)) == UNSPEC + && XINT (XEXP (operands[3], 0), 1) == UNSPEC_PLTOFF" + { + output_asm_insn +- ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands); ++ ("lea{q}\t{%E1@tlsgd(%%rip), %5|%5, %E1@tlsgd[rip]}", operands); + output_asm_insn ("movabs{q}\t{%3, %%rax|rax, %3}", operands); + output_asm_insn ("add{q}\t{%2, %%rax|rax, %2}", operands); + return "call\t{*%%rax|rax}"; +@@ -21231,11 +21232,12 @@ (define_insn "*tls_local_dynamic_base_64_" + (call:P + (mem:QI (match_operand 1 "constant_call_address_operand" "Bz")) + (match_operand 2))) +- (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)] ++ (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE) ++ (clobber (match_operand:P 3 "register_operand" "=D"))] + "TARGET_64BIT" + { + output_asm_insn +- ("lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}", operands); ++ ("lea{q}\t{%&@tlsld(%%rip), %q3|%q3, %&@tlsld[rip]}", operands); + if (TARGET_SUN_TLS) + return "call\t%p1@plt"; + if (flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT) +@@ -21251,14 +21253,15 @@ (define_insn "*tls_local_dynamic_base_64_largepic" + (mem:QI (plus:DI (match_operand:DI 1 "register_operand" "b") + (match_operand:DI 2 "immediate_operand" "i"))) + (match_operand 3))) +- (unspec:DI [(reg:DI SP_REG)] UNSPEC_TLS_LD_BASE)] ++ (unspec:DI [(reg:DI SP_REG)] UNSPEC_TLS_LD_BASE) ++ (clobber (match_operand:DI 4 "register_operand" "=D"))] + "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF + && GET_CODE (operands[2]) == CONST + && GET_CODE (XEXP (operands[2], 0)) == UNSPEC + && XINT (XEXP (operands[2], 0), 1) == UNSPEC_PLTOFF" + { + output_asm_insn +- ("lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}", operands); ++ ("lea{q}\t{%&@tlsld(%%rip), %4|%4, %&@tlsld[rip]}", operands); + output_asm_insn ("movabs{q}\t{%2, %%rax|rax, %2}", operands); + output_asm_insn ("add{q}\t{%1, %%rax|rax, %1}", operands); + return "call\t{*%%rax|rax}"; +@@ -21272,7 +21275,8 @@ (define_expand "@tls_local_dynamic_base_64_" + (call:P + (mem:QI (match_operand 1)) + (const_int 0))) +- (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)])] ++ (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE) ++ (clobber (match_operand:P 2 "register_operand"))])] + "TARGET_64BIT" + "ix86_tls_descriptor_calls_expanded_in_cfun = true;") + +diff --git a/gcc/testsuite/gcc.target/i386/pr120908.c b/gcc/testsuite/gcc.target/i386/pr120908.c +new file mode 100644 +index 00000000000..10e5a46d8d4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr120908.c +@@ -0,0 +1,16 @@ ++/* { dg-do compile { target { lp64 && fpic } } } */ ++/* { dg-options "-O2 -fpic -mtls-dialect=gnu -mcmodel=large" } */ ++ ++extern __thread long bar1; ++long * ++foo1 (void) ++{ ++ return &bar1; ++} ++ ++static __thread long bar2; ++long * ++foo2 (void) ++{ ++ return &bar2; ++} +-- +2.31.1 + diff --git a/0028-x86-Enable-mov-mode-_-and-or-only-for-Oz.patch b/0028-x86-Enable-mov-mode-_-and-or-only-for-Oz.patch new file mode 100644 index 0000000..7c36066 --- /dev/null +++ b/0028-x86-Enable-mov-mode-_-and-or-only-for-Oz.patch @@ -0,0 +1,273 @@ +From b7e9ac6549ec5c73ea1a7ae7514ad1a049e801f1 Mon Sep 17 00:00:00 2001 +From: "H.J. Lu" +Date: Sun, 25 May 2025 07:40:29 +0800 +Subject: [PATCH] x86: Enable *mov_(and|or) only for -Oz + +commit ef26c151c14a87177d46fd3d725e7f82e040e89f +Author: Roger Sayle +Date: Thu Dec 23 12:33:07 2021 +0000 + + x86: PR target/103773: Fix wrong-code with -Oz from pop to memory. + +added "*mov_and" and extended "*mov_or" to transform +"mov $0,mem" to the shorter "and $0,mem" and "mov $-1,mem" to the shorter +"or $-1,mem" for -Oz. But the new pattern: + +(define_insn "*mov_and" + [(set (match_operand:SWI248 0 "memory_operand" "=m") + (match_operand:SWI248 1 "const0_operand")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + "and{}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "") + (set_attr "length_immediate" "1")]) + +and the extended pattern: + +(define_insn "*mov_or" + [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm") + (match_operand:SWI248 1 "constm1_operand")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + "or{}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "") + (set_attr "length_immediate" "1")]) + +aren't guarded for -Oz. As a result, "and $0,mem" and "or $-1,mem" are +generated without -Oz. + +1. Change *mov_and" to define_insn_and_split and split it to +"mov $0,mem" if not -Oz. +2. Change "*mov_or" to define_insn_and_split and split it to +"mov $-1,mem" if not -Oz. +3. Don't transform "mov $-1,reg" to "push $-1; pop reg" for -Oz since it +should be transformed to "or $-1,reg". + +gcc/ + + PR target/120427 + * config/i386/i386.md (*mov_and): Changed to + define_insn_and_split. Split it to "mov $0,mem" if not -Oz. + (*mov_or): Changed to define_insn_and_split. Split it + to "mov $-1,mem" if not -Oz. + (peephole2): Don't transform "mov $-1,reg" to "push $-1; pop reg" + for -Oz since it will be transformed to "or $-1,reg". + +gcc/testsuite/ + + PR target/120427 + * gcc.target/i386/cold-attribute-4.c: Compile with -Oz. + * gcc.target/i386/pr120427-1.c: New test. + * gcc.target/i386/pr120427-2.c: Likewise. + * gcc.target/i386/pr120427-3.c: Likewise. + * gcc.target/i386/pr120427-4.c: Likewise. + +Signed-off-by: H.J. Lu +(cherry picked from commit 4c80062d7b8c272e2e193b8074a8440dbb4fe588) +--- + gcc/config/i386/i386.md | 15 ++++++- + .../gcc.target/i386/cold-attribute-4.c | 2 +- + gcc/testsuite/gcc.target/i386/pr120427-1.c | 28 ++++++++++++ + gcc/testsuite/gcc.target/i386/pr120427-2.c | 28 ++++++++++++ + gcc/testsuite/gcc.target/i386/pr120427-3.c | 45 +++++++++++++++++++ + gcc/testsuite/gcc.target/i386/pr120427-4.c | 6 +++ + 6 files changed, 121 insertions(+), 3 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/i386/pr120427-1.c + create mode 100644 gcc/testsuite/gcc.target/i386/pr120427-2.c + create mode 100644 gcc/testsuite/gcc.target/i386/pr120427-3.c + create mode 100644 gcc/testsuite/gcc.target/i386/pr120427-4.c + +diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md +index b02057c5f0f..3ed4b2938b9 100644 +--- a/gcc/config/i386/i386.md ++++ b/gcc/config/i386/i386.md +@@ -2332,22 +2332,32 @@ (define_insn "*mov_xor" + (set_attr "mode" "SI") + (set_attr "length_immediate" "0")]) + +-(define_insn "*mov_and" ++;; Generate shorter "and $0,mem" for -Oz. Split it to "mov $0,mem" ++;; otherwise. ++(define_insn_and_split "*mov_and" + [(set (match_operand:SWI248 0 "memory_operand" "=m") + (match_operand:SWI248 1 "const0_operand")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + "and{}\t{%1, %0|%0, %1}" ++ "&& !(optimize_insn_for_size_p () && optimize_size > 1)" ++ [(set (match_dup 0) (match_dup 1))] ++ "" + [(set_attr "type" "alu1") + (set_attr "mode" "") + (set_attr "length_immediate" "1")]) + +-(define_insn "*mov_or" ++;; Generate shorter "or $-1,mem" for -Oz. Split it to "mov $-1,mem" ++;; otherwise. ++(define_insn_and_split "*mov_or" + [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm") + (match_operand:SWI248 1 "constm1_operand")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + "or{}\t{%1, %0|%0, %1}" ++ "&& !(optimize_insn_for_size_p () && optimize_size > 1)" ++ [(set (match_dup 0) (match_dup 1))] ++ "" + [(set_attr "type" "alu1") + (set_attr "mode" "") + (set_attr "length_immediate" "1")]) +@@ -2852,6 +2862,7 @@ (define_peephole2 + (match_operand:SWI248 1 "const_int_operand"))] + "optimize_insn_for_size_p () && optimize_size > 1 + && operands[1] != const0_rtx ++ && operands[1] != constm1_rtx + && IN_RANGE (INTVAL (operands[1]), -128, 127) + && !ix86_red_zone_used + && REGNO (operands[0]) != SP_REG" +diff --git a/gcc/testsuite/gcc.target/i386/cold-attribute-4.c b/gcc/testsuite/gcc.target/i386/cold-attribute-4.c +index 37a41e954da..e0808c53905 100644 +--- a/gcc/testsuite/gcc.target/i386/cold-attribute-4.c ++++ b/gcc/testsuite/gcc.target/i386/cold-attribute-4.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2" } */ ++/* { dg-options "-Oz" } */ + #include + + int +diff --git a/gcc/testsuite/gcc.target/i386/pr120427-1.c b/gcc/testsuite/gcc.target/i386/pr120427-1.c +new file mode 100644 +index 00000000000..7f1690e49b4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr120427-1.c +@@ -0,0 +1,28 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mtune=sapphirerapids" } */ ++/* { dg-final { scan-assembler-not "and\[lq\]?\[\\t \]+\\\$0, \[0-9\]*\\(" } } */ ++ ++struct __pthread_mutex_s ++{ ++ int __lock; ++ unsigned int __count; ++ int __owner; ++ unsigned int __nusers; ++ int __kind; ++ short __spins; ++ short __elision; ++ void *p[2]; ++}; ++typedef union ++{ ++ struct __pthread_mutex_s __data; ++ char __size[40]; ++ long int __align; ++} pthread_mutex_t; ++typedef struct { pthread_mutex_t mutex; } __rtld_lock_recursive_t; ++void ++foo (__rtld_lock_recursive_t *lock, int i) ++{ ++ lock[i] = (__rtld_lock_recursive_t) {{ { 0, 0, 0, 0, 1, ++ 0, 0, { ((void *)0) , ((void *)0) } } }}; ++} +diff --git a/gcc/testsuite/gcc.target/i386/pr120427-2.c b/gcc/testsuite/gcc.target/i386/pr120427-2.c +new file mode 100644 +index 00000000000..a380c128ccb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr120427-2.c +@@ -0,0 +1,28 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mtune=sapphirerapids" } */ ++/* { dg-final { scan-assembler-not "or\[lq\]?\[\\t \]+\\\$-1, \[0-9\]*\\(" } } */ ++ ++struct __pthread_mutex_s ++{ ++ int __lock; ++ unsigned int __count; ++ int __owner; ++ unsigned int __nusers; ++ int __kind; ++ short __spins; ++ short __elision; ++ void *p[2]; ++}; ++typedef union ++{ ++ struct __pthread_mutex_s __data; ++ char __size[40]; ++ long int __align; ++} pthread_mutex_t; ++typedef struct { pthread_mutex_t mutex; } __rtld_lock_recursive_t; ++void ++foo (__rtld_lock_recursive_t *lock, int i) ++{ ++ lock[i] = (__rtld_lock_recursive_t) {{ { -1, -1, -1, -1, 1, ++ -1, -1, { ((void *)-1) , ((void *)-1) } } }}; ++} +diff --git a/gcc/testsuite/gcc.target/i386/pr120427-3.c b/gcc/testsuite/gcc.target/i386/pr120427-3.c +new file mode 100644 +index 00000000000..951cb1f5ddb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr120427-3.c +@@ -0,0 +1,45 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++typedef int SItype __attribute__ ((mode (SI))); ++typedef unsigned int USItype __attribute__ ((mode (SI))); ++typedef unsigned int UDItype __attribute__ ((mode (DI))); ++typedef UDItype __attribute__ ((__may_alias__)) bar_t; ++ ++static inline __attribute__((__always_inline__)) SItype ++bar (const bar_t **p, SItype prec) ++{ ++ bar_t mslimb = 0; ++ SItype i = 20; ++ SItype n = ((USItype) prec) % 4; ++ if (n) ++ { ++ prec -= n; ++ if (prec == 0) ++ return 1; ++ mslimb = (*p)[i]; ++ } ++ while (mslimb == 0) ++ { ++ prec -= 4; ++ if (prec == 0) ++ return 1; ++ --i; ++ mslimb = (*p)[i]; ++ } ++ return prec; ++} ++UDItype ++foo (const bar_t *i, SItype iprec) ++{ ++ iprec = bar (&i, iprec); ++ USItype aiprec = iprec < 0 ? -iprec : iprec; ++ bar_t msb = *i; ++ UDItype mantissa = 0; ++ if (aiprec % 4) ++ msb &= ((bar_t) 1 << aiprec) - 1; ++ if (aiprec >= 54) ++ mantissa = (UDItype) msb << 32; ++ ++ return (mantissa ^ (UDItype) 0x20000000000000); ++} +diff --git a/gcc/testsuite/gcc.target/i386/pr120427-4.c b/gcc/testsuite/gcc.target/i386/pr120427-4.c +new file mode 100644 +index 00000000000..2b453b787ec +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr120427-4.c +@@ -0,0 +1,6 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include "cold-attribute-4.c" ++ ++/* { dg-final { scan-assembler "movl" } } */ +-- +2.31.1 + diff --git a/0029-Eliminate-redundant-vpextrq-vpinsrq-when-move-TI-to-.patch b/0029-Eliminate-redundant-vpextrq-vpinsrq-when-move-TI-to-.patch new file mode 100644 index 0000000..2cea91e --- /dev/null +++ b/0029-Eliminate-redundant-vpextrq-vpinsrq-when-move-TI-to-.patch @@ -0,0 +1,95 @@ +From c8eb4fcd40c2faef5dadbaa83abfcc6e058ee9f6 Mon Sep 17 00:00:00 2001 +From: liuhongt +Date: Tue, 29 Jul 2025 00:01:37 -0700 +Subject: [PATCH] Eliminate redundant vpextrq/vpinsrq when move TI to V4SI. + +r14-1902-g96c3539f2a3813 split TImode move with 2 DImode move, it's +supposed to optimize TImode in parameter/return since accoring to +psABI it's stored into 2 general registers. + +But when TImode is not in parameter/return, it could create redundancy +in the PR. + +The patch add a splitter to handle that. + +.i.e. +(insn 10 9 14 2 (set (subreg:V2DI (reg:V4SI 98 [ ]) 0) + (vec_concat:V2DI (subreg:DI (reg:TI 101) 0) + (subreg:DI (reg:TI 101) 8))) + 8442 {vec_concatv2di} + (expr_list:REG_DEAD (reg:TI 101) + +gcc/ChangeLog: + + PR target/121274 + * config/i386/sse.md (*vec_concatv2di_0): Add a splitter + before it. + +gcc/testsuite/ChangeLog: + + * gcc.target/i386/pr121274.c: New test. + +(cherry picked from commit 6a466839340dce3b596b3ae5ce85bd05a067ae00) +--- + gcc/config/i386/sse.md | 13 +++++++++++++ + gcc/testsuite/gcc.target/i386/pr121274.c | 24 ++++++++++++++++++++++++ + 2 files changed, 37 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/i386/pr121274.c + +diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md +index 67313d51853..477e18b112f 100644 +--- a/gcc/config/i386/sse.md ++++ b/gcc/config/i386/sse.md +@@ -20793,6 +20793,19 @@ (define_insn "vec_concatv2di" + (const_string "orig"))) + (set_attr "mode" "TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")]) + ++;; Eliminate redundancy caused by ++;; /* Special case TImode to 128-bit vector conversions via V2DI. */ ++;; in ix86_expand_vector_move ++ ++(define_split ++ [(set (match_operand:V2DI 0 "register_operand") ++ (vec_concat:V2DI ++ (subreg:DI (match_operand:TI 1 "register_operand") 0) ++ (subreg:DI (match_dup 1) 8)))] ++ "TARGET_SSE2 && ix86_pre_reload_split ()" ++ [(set (match_dup 0) ++ (subreg:V2DI (match_dup 1) 0))]) ++ + (define_insn "*vec_concatv2di_0" + [(set (match_operand:V2DI 0 "register_operand" "=v,v ,x") + (vec_concat:V2DI +diff --git a/gcc/testsuite/gcc.target/i386/pr121274.c b/gcc/testsuite/gcc.target/i386/pr121274.c +new file mode 100644 +index 00000000000..16760cfcbac +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr121274.c +@@ -0,0 +1,24 @@ ++/* { dg-do compile { target { ! ia32 } } } */ ++/* { dg-options "-march=x86-64-v4 -O2" } */ ++/* { dg-final { scan-assembler-not "vpextrq" } } */ ++/* { dg-final { scan-assembler-not "vpinsrq" } } */ ++ ++typedef int v16si __attribute__((vector_size(64))); ++typedef int v4si __attribute__((vector_size(16))); ++ ++v4si f(v16si x) ++{ ++ return __builtin_shufflevector(x, x, 0, 1, 2, 3); ++} ++ ++v4si g(v16si x) ++{ ++return __builtin_shufflevector(x, x, 4, 5, 6, 7); ++} ++ ++v4si f1(__int128 *x) ++{ ++ __int128 t = *x; ++ asm("":"+x"(t)); ++ return (v4si)t; ++} +-- +2.31.1 + diff --git a/0030-x86-Transform-to-pushq-1-popq-reg-for-Oz.patch b/0030-x86-Transform-to-pushq-1-popq-reg-for-Oz.patch new file mode 100644 index 0000000..2864097 --- /dev/null +++ b/0030-x86-Transform-to-pushq-1-popq-reg-for-Oz.patch @@ -0,0 +1,67 @@ +From 8b959ca27ee95ba32e5a587e0490f6cb49ef9760 Mon Sep 17 00:00:00 2001 +From: "H.J. Lu" +Date: Tue, 29 Jul 2025 11:22:35 -0700 +Subject: [PATCH] x86: Transform to "pushq $-1; popq reg" for -Oz + +commit 4c80062d7b8c272e2e193b8074a8440dbb4fe588 +Author: H.J. Lu +Date: Sun May 25 07:40:29 2025 +0800 + + x86: Enable *mov_(and|or) only for -Oz + +disabled transformation from "movq $-1,reg" to "pushq $-1; popq reg" for +-Oz. But for legacy integer registers, the former is 4 bytes and the +latter is 3 bytes. Enable such transformation for -Oz. + +gcc/ + + PR target/120427 + * config/i386/i386.md (peephole2): Transform "movq $-1,reg" to + "pushq $-1; popq reg" for -Oz if reg is a legacy integer register. + +gcc/testsuite/ + + PR target/120427 + * gcc.target/i386/pr120427-5.c: New test. + +Signed-off-by: H.J. Lu +(cherry picked from commit 71dae74158d05b75e367629ce21da3f0a2945576) +--- + gcc/config/i386/i386.md | 3 ++- + gcc/testsuite/gcc.target/i386/pr120427-5.c | 10 ++++++++++ + 2 files changed, 12 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.target/i386/pr120427-5.c + +diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md +index 3ed4b2938b9..b11cab30ee3 100644 +--- a/gcc/config/i386/i386.md ++++ b/gcc/config/i386/i386.md +@@ -2862,7 +2862,8 @@ (define_peephole2 + (match_operand:SWI248 1 "const_int_operand"))] + "optimize_insn_for_size_p () && optimize_size > 1 + && operands[1] != const0_rtx +- && operands[1] != constm1_rtx ++ && (operands[1] != constm1_rtx ++ || (mode == DImode && LEGACY_INT_REG_P (operands[0]))) + && IN_RANGE (INTVAL (operands[1]), -128, 127) + && !ix86_red_zone_used + && REGNO (operands[0]) != SP_REG" +diff --git a/gcc/testsuite/gcc.target/i386/pr120427-5.c b/gcc/testsuite/gcc.target/i386/pr120427-5.c +new file mode 100644 +index 00000000000..7199aef7924 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr120427-5.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile { target { ! ia32 } } } */ ++/* { dg-options "-Oz" } */ ++ ++long long ++func1 (void) ++{ ++ return -1; ++} ++/* { dg-final { scan-assembler-times "pushq\[ \\t\]+\\\$-1" 1 } } */ ++/* { dg-final { scan-assembler-times "popq\[ \\t\]+%rax" 1 } } */ +-- +2.31.1 + diff --git a/0031-x86-Disallow-mtls-dialect-gnu-with-no_caller_saved_r.patch b/0031-x86-Disallow-mtls-dialect-gnu-with-no_caller_saved_r.patch new file mode 100644 index 0000000..151f036 --- /dev/null +++ b/0031-x86-Disallow-mtls-dialect-gnu-with-no_caller_saved_r.patch @@ -0,0 +1,179 @@ +From 5a11b0fa070298854cb59d659f8ebe0711184e87 Mon Sep 17 00:00:00 2001 +From: "H.J. Lu" +Date: Thu, 24 Jul 2025 07:38:13 -0700 +Subject: [PATCH] x86: Disallow -mtls-dialect=gnu with + no_caller_saved_registers + +__tls_get_addr doesn't preserve vector registers. When a function +with no_caller_saved_registers attribute calls __tls_get_addr, YMM +and ZMM registers will be clobbered. Issue an error and suggest +-mtls-dialect=gnu2 in this case. + +gcc/ + + PR target/121208 + * config/i386/i386.cc (ix86_tls_get_addr): Issue an error for + -mtls-dialect=gnu with no_caller_saved_registers attribute and + suggest -mtls-dialect=gnu2. + +gcc/testsuite/ + + PR target/121208 + * gcc.target/i386/pr121208-1a.c: New test. + * gcc.target/i386/pr121208-1b.c: Likewise. + * gcc.target/i386/pr121208-2a.c: Likewise. + * gcc.target/i386/pr121208-2b.c: Likewise. + * gcc.target/i386/pr121208-3a.c: Likewise. + * gcc.target/i386/pr121208-3b.c: Likewise. + +Signed-off-by: H.J. Lu +(cherry picked from commit 5760ddbce26ff9c5c8851b6b2089ad65981d5078) +--- + gcc/config/i386/i386.cc | 22 +++++++++++++++++++++ + gcc/testsuite/gcc.target/i386/pr121208-1a.c | 15 ++++++++++++++ + gcc/testsuite/gcc.target/i386/pr121208-1b.c | 4 ++++ + gcc/testsuite/gcc.target/i386/pr121208-2a.c | 17 ++++++++++++++++ + gcc/testsuite/gcc.target/i386/pr121208-2b.c | 4 ++++ + gcc/testsuite/gcc.target/i386/pr121208-3a.c | 17 ++++++++++++++++ + gcc/testsuite/gcc.target/i386/pr121208-3b.c | 4 ++++ + 7 files changed, 83 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/i386/pr121208-1a.c + create mode 100644 gcc/testsuite/gcc.target/i386/pr121208-1b.c + create mode 100644 gcc/testsuite/gcc.target/i386/pr121208-2a.c + create mode 100644 gcc/testsuite/gcc.target/i386/pr121208-2b.c + create mode 100644 gcc/testsuite/gcc.target/i386/pr121208-3a.c + create mode 100644 gcc/testsuite/gcc.target/i386/pr121208-3b.c + +diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc +index edb51f2a9b2..7ba977d5f81 100644 +--- a/gcc/config/i386/i386.cc ++++ b/gcc/config/i386/i386.cc +@@ -12119,6 +12119,28 @@ static GTY(()) rtx ix86_tls_symbol; + static rtx + ix86_tls_get_addr (void) + { ++ if (cfun->machine->call_saved_registers ++ == TYPE_NO_CALLER_SAVED_REGISTERS) ++ { ++ /* __tls_get_addr doesn't preserve vector registers. When a ++ function with no_caller_saved_registers attribute calls ++ __tls_get_addr, YMM and ZMM registers will be clobbered. ++ Issue an error and suggest -mtls-dialect=gnu2 in this case. */ ++ if (cfun->machine->func_type == TYPE_NORMAL) ++ error (G_("%<-mtls-dialect=gnu2%> must be used with a function" ++ " with the % attribute")); ++ else ++ error (cfun->machine->func_type == TYPE_EXCEPTION ++ ? G_("%<-mtls-dialect=gnu2%> must be used with an" ++ " exception service routine") ++ : G_("%<-mtls-dialect=gnu2%> must be used with an" ++ " interrupt service routine")); ++ /* Don't issue the same error twice. */ ++ cfun->machine->func_type = TYPE_NORMAL; ++ cfun->machine->call_saved_registers ++ = TYPE_DEFAULT_CALL_SAVED_REGISTERS; ++ } ++ + if (!ix86_tls_symbol) + { + const char *sym +diff --git a/gcc/testsuite/gcc.target/i386/pr121208-1a.c b/gcc/testsuite/gcc.target/i386/pr121208-1a.c +new file mode 100644 +index 00000000000..ac851cb50d8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr121208-1a.c +@@ -0,0 +1,15 @@ ++/* { dg-do compile { target *-*-linux* } } */ ++/* { dg-options "-O2 -fPIC -mtls-dialect=gnu" } */ ++ ++extern __thread int bar; ++extern void func (void); ++ ++__attribute__((no_caller_saved_registers)) ++void ++foo (int error) ++{ ++ bar = 1; /* { dg-error -mtls-dialect=gnu2 } */ ++ if (error == 0) ++ func (); ++ bar = 0; ++} +diff --git a/gcc/testsuite/gcc.target/i386/pr121208-1b.c b/gcc/testsuite/gcc.target/i386/pr121208-1b.c +new file mode 100644 +index 00000000000..b97ac715c65 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr121208-1b.c +@@ -0,0 +1,4 @@ ++/* { dg-do compile { target *-*-linux* } } */ ++/* { dg-options "-O2 -fPIC -mtls-dialect=gnu2" } */ ++ ++#include "pr121208-1a.c" +diff --git a/gcc/testsuite/gcc.target/i386/pr121208-2a.c b/gcc/testsuite/gcc.target/i386/pr121208-2a.c +new file mode 100644 +index 00000000000..c1891ae322c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr121208-2a.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile { target *-*-linux* } } */ ++/* { dg-options "-O2 -fPIC -mtls-dialect=gnu" } */ ++ ++typedef unsigned int uword_t __attribute__ ((mode (__word__))); ++extern __thread int bar; ++extern void func (void); ++ ++__attribute__((target("general-regs-only"))) ++__attribute__((interrupt)) ++void ++foo (void *frame, uword_t error) ++{ ++ bar = 1; /* { dg-error -mtls-dialect=gnu2 } */ ++ if (error == 0) ++ func (); ++ bar = 0; ++} +diff --git a/gcc/testsuite/gcc.target/i386/pr121208-2b.c b/gcc/testsuite/gcc.target/i386/pr121208-2b.c +new file mode 100644 +index 00000000000..269b120f990 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr121208-2b.c +@@ -0,0 +1,4 @@ ++/* { dg-do compile { target *-*-linux* } } */ ++/* { dg-options "-O2 -fPIC -mtls-dialect=gnu2" } */ ++ ++#include "pr121208-2a.c" +diff --git a/gcc/testsuite/gcc.target/i386/pr121208-3a.c b/gcc/testsuite/gcc.target/i386/pr121208-3a.c +new file mode 100644 +index 00000000000..26fe6870155 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr121208-3a.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile { target *-*-linux* } } */ ++/* { dg-options "-O2 -fPIC -mtls-dialect=gnu" } */ ++ ++typedef unsigned int uword_t __attribute__ ((mode (__word__))); ++extern __thread int bar; ++extern void func (void); ++ ++__attribute__((target("general-regs-only"))) ++__attribute__((interrupt)) ++void ++foo (void *frame) ++{ ++ bar = 1; /* { dg-error -mtls-dialect=gnu2 } */ ++ if (frame == 0) ++ func (); ++ bar = 0; ++} +diff --git a/gcc/testsuite/gcc.target/i386/pr121208-3b.c b/gcc/testsuite/gcc.target/i386/pr121208-3b.c +new file mode 100644 +index 00000000000..b672d751d7f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr121208-3b.c +@@ -0,0 +1,4 @@ ++/* { dg-do compile { target *-*-linux* } } */ ++/* { dg-options "-O2 -fPIC -mtls-dialect=gnu2" } */ ++ ++#include "pr121208-3a.c" +-- +2.31.1 + diff --git a/0032-x86-Pass-mno-80387-to-compile-pr121208-1-a-b-.c.patch b/0032-x86-Pass-mno-80387-to-compile-pr121208-1-a-b-.c.patch new file mode 100644 index 0000000..1de7fa5 --- /dev/null +++ b/0032-x86-Pass-mno-80387-to-compile-pr121208-1-a-b-.c.patch @@ -0,0 +1,54 @@ +From 32960f98e639b4d0feed429b13c5f037261560d5 Mon Sep 17 00:00:00 2001 +From: "H.J. Lu" +Date: Tue, 29 Jul 2025 09:11:34 -0700 +Subject: [PATCH] x86: Pass -mno-80387 to compile pr121208-1(a|b).c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Pass -mno-80387 to compile pr121208-1(a|b).c to silence + +.../pr121208-1a.c:11:1: sorry, unimplemented: 80387 instructions aren’t allowed in a function with the ‘no_caller_saved_registers’ attribute + +Partially backport the PR target/121540 fix + +9d7f45e9806 x86: Disallow MMX and 80387 in no_caller_saved_registers function + +to also add -mno-sse -mno-mmx. + + PR target/121208 + * gcc.target/i386/pr121208-1a.c (dg-options): Add + -mno-sse -mno-mmx -mno-80387. + * gcc.target/i386/pr121208-1b.c (dg-options): Likewise. + +Signed-off-by: H.J. Lu +(cherry picked from commit c6d1f58da7eb72e8bac307d342e4655012b36a89) +--- + gcc/testsuite/gcc.target/i386/pr121208-1a.c | 2 +- + gcc/testsuite/gcc.target/i386/pr121208-1b.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/i386/pr121208-1a.c b/gcc/testsuite/gcc.target/i386/pr121208-1a.c +index ac851cb50d8..db8d3481622 100644 +--- a/gcc/testsuite/gcc.target/i386/pr121208-1a.c ++++ b/gcc/testsuite/gcc.target/i386/pr121208-1a.c +@@ -1,5 +1,5 @@ + /* { dg-do compile { target *-*-linux* } } */ +-/* { dg-options "-O2 -fPIC -mtls-dialect=gnu" } */ ++/* { dg-options "-O2 -fPIC -mno-sse -mno-mmx -mno-80387 -mtls-dialect=gnu" } */ + + extern __thread int bar; + extern void func (void); +diff --git a/gcc/testsuite/gcc.target/i386/pr121208-1b.c b/gcc/testsuite/gcc.target/i386/pr121208-1b.c +index b97ac715c65..1591f7d3fed 100644 +--- a/gcc/testsuite/gcc.target/i386/pr121208-1b.c ++++ b/gcc/testsuite/gcc.target/i386/pr121208-1b.c +@@ -1,4 +1,4 @@ + /* { dg-do compile { target *-*-linux* } } */ +-/* { dg-options "-O2 -fPIC -mtls-dialect=gnu2" } */ ++/* { dg-options "-O2 -fPIC -mno-sse -mno-mmx -mno-80387 -mtls-dialect=gnu2" } */ + + #include "pr121208-1a.c" +-- +2.31.1 + diff --git a/gcc.spec b/gcc.spec index 95d5df4..264c6b5 100644 --- a/gcc.spec +++ b/gcc.spec @@ -1,4 +1,4 @@ -%define anolis_release 1 +%define anolis_release 2 %{?scl_package:%global scl gcc-toolset-14} %global scl_prefix gcc-toolset-14- @@ -337,6 +337,16 @@ Patch3039: 0021-LoongArch-Change-OSDIR-for-distribution.patch Patch3040: 0022-LoongArch-support-nonshared-extfloat.diff Patch3041: 0023-LoongArch-compat-for-libstdcxx-nonshared.diff +Patch3042: 0024-i386-Remove-CLDEMOTE-for-clients.patch +Patch3043: 0025-i386-Remove-KEYLOCKER-related-feature-since-Panther-.patch +Patch3044: 0026-x86-64-Add-RDI-clobber-to-tls_global_dynamic_64-patt.patch +Patch3045: 0027-x86-64-Add-RDI-clobber-to-64-bit-dynamic-TLS-pattern.patch +Patch3046: 0028-x86-Enable-mov-mode-_-and-or-only-for-Oz.patch +Patch3047: 0029-Eliminate-redundant-vpextrq-vpinsrq-when-move-TI-to-.patch +Patch3048: 0030-x86-Transform-to-pushq-1-popq-reg-for-Oz.patch +Patch3049: 0031-x86-Disallow-mtls-dialect-gnu-with-no_caller_saved_r.patch +Patch3050: 0032-x86-Pass-mno-80387-to-compile-pr121208-1-a-b-.c.patch + %global nonsharedver 110 %if 0%{?scl:1} %global _gnu %{nil} @@ -704,6 +714,15 @@ touch -r isl-0.24/m4/ax_prog_cxx_for_build.m4 isl-0.24/m4/ax_prog_cc_for_build.m %patch -P3040 -p1 -b .dts-test-40~ %patch -P3041 -p1 -b .dts-test-41~ +%patch -P3042 -p1 -b .dts-test-42~ +%patch -P3043 -p1 -b .dts-test-43~ +%patch -P3044 -p1 -b .dts-test-44~ +%patch -P3045 -p1 -b .dts-test-45~ +%patch -P3046 -p1 -b .dts-test-46~ +%patch -P3047 -p1 -b .dts-test-47~ +%patch -P3048 -p1 -b .dts-test-48~ +%patch -P3049 -p1 -b .dts-test-49~ +%patch -P3050 -p1 -b .dts-test-50~ find gcc/testsuite -name \*.pr96939~ | xargs rm -f @@ -2747,6 +2766,9 @@ fi %endif %changelog +* Thu Sep 30 2025 Hu, Lin 14.3.0-2 +- x86: Backport patches from release/gcc-14 + * Tue Jun 17 2025 Weisson 14.3.0-1 - update to gcc 14.3.0. -- Gitee