From d9692bfdbafe8b787396ab0e2599fa7cff8eb064 Mon Sep 17 00:00:00 2001 From: fengpengbo Date: Tue, 29 Jul 2025 11:32:26 +0800 Subject: [PATCH] backport-add-riscv64-assembly-support-and-features Signed-off-by: liuqingtao --- ...iscv64-assembly-support-and-features.patch | 1620 +++++++++++++++++ openssl.spec | 9 +- 2 files changed, 1628 insertions(+), 1 deletion(-) create mode 100644 backport-add-riscv64-assembly-support-and-features.patch diff --git a/backport-add-riscv64-assembly-support-and-features.patch b/backport-add-riscv64-assembly-support-and-features.patch new file mode 100644 index 0000000..7ebc533 --- /dev/null +++ b/backport-add-riscv64-assembly-support-and-features.patch @@ -0,0 +1,1620 @@ +From db72a0088cba893ef39c6b1c9b971c4b4e30382d Mon Sep 17 00:00:00 2001 +From: fengpengbo +Date: Sat, 26 Jul 2025 15:03:57 +0800 +Subject: [PATCH] add RISC-V assembly support and features + +Reference: https://github.com/openssl/openssl/commit/8448432a3be6cd5eb2576594c742e3d54d92f78a +Add basic RISC-V cpuid and OPENSSL_riscvcap +Reviewed-by: Philipp Tomsich +Signed-off-by: Henry Brausen + +Reviewed-by: Tomas Mraz +Reviewed-by: Paul Dale +(Merged from #17640) + +Reference: https://github.com/openssl/openssl/commit/cb2764f2a8165421dc5ab52159af99cbf766fa2c +Add riscv64 asm_arch to linux64-riscv64 target + +Reviewed-by: Philipp Tomsich +Signed-off-by: Henry Brausen + +Reviewed-by: Tomas Mraz +Reviewed-by: Paul Dale +(Merged from #18275) + +Reference: https://github.com/openssl/openssl/blob/master/crypto/perlasm/riscv.pm + +Signed-off-by: liuqingtao +--- + Configurations/10-main.conf | 1 + + crypto/build.info | 3 + + crypto/perlasm/riscv.pm | 1076 +++++++++++++++++++++++++++++++++ + crypto/riscv64cpuid.pl | 105 ++++ + crypto/riscvcap.c | 145 +++++ + include/crypto/riscv_arch.def | 61 ++ + include/crypto/riscv_arch.h | 125 ++++ + 7 files changed, 1516 insertions(+) + create mode 100644 crypto/perlasm/riscv.pm + create mode 100644 crypto/riscv64cpuid.pl + create mode 100644 crypto/riscvcap.c + create mode 100644 include/crypto/riscv_arch.def + create mode 100644 include/crypto/riscv_arch.h + +diff --git a/Configurations/10-main.conf b/Configurations/10-main.conf +index 280a75b..09d5e89 100644 +--- a/Configurations/10-main.conf ++++ b/Configurations/10-main.conf +@@ -819,6 +819,7 @@ my %targets = ( + "linux64-riscv64" => { + inherit_from => [ "linux-generic64"], + perlasm_scheme => "linux64", ++ asm_arch => 'riscv64', + }, + + # loongarch64 below refers to contemporary LoongArch Architecture +diff --git a/crypto/build.info b/crypto/build.info +index c04db55..f1dd411 100644 +--- a/crypto/build.info ++++ b/crypto/build.info +@@ -51,6 +51,8 @@ IF[{- !$disabled{asm} && $config{processor} ne '386' -}] + + $CPUIDASM_c64xplus=c64xpluscpuid.s + ++ $CPUIDASM_riscv64=riscvcap.c riscv64cpuid.s ++ + # Now that we have defined all the arch specific variables, use the + # appropriate one, and define the appropriate macros + IF[$CPUIDASM_{- $target{asm_arch} -}] +@@ -130,6 +132,7 @@ GENERATE[armv4cpuid.S]=armv4cpuid.pl + INCLUDE[armv4cpuid.o]=. + GENERATE[s390xcpuid.S]=s390xcpuid.pl + INCLUDE[s390xcpuid.o]=. ++GENERATE[riscv64cpuid.s]=riscv64cpuid.pl + + IF[{- $config{target} =~ /^(?:Cygwin|mingw|VC-|BC-)/ -}] + SHARED_SOURCE[../libcrypto]=dllmain.c +diff --git a/crypto/perlasm/riscv.pm b/crypto/perlasm/riscv.pm +new file mode 100644 +index 0000000..69a47c5 +--- /dev/null ++++ b/crypto/perlasm/riscv.pm +@@ -0,0 +1,1076 @@ ++#! /usr/bin/env perl ++# This file is dual-licensed, meaning that you can use it under your ++# choice of either of the following two licenses: ++# ++# Copyright 2023-2025 The OpenSSL Project Authors. All Rights Reserved. ++# ++# Licensed under the Apache License 2.0 (the "License"). You can obtain ++# a copy in the file LICENSE in the source distribution or at ++# https://www.openssl.org/source/license.html ++# ++# or ++# ++# Copyright (c) 2023, Christoph Müllner ++# Copyright (c) 2023, Jerry Shih ++# Copyright (c) 2023, Phoebe Chen ++# Copyright (c) 2025, Julian Zhu ++# All rights reserved. ++# ++# Redistribution and use in source and binary forms, with or without ++# modification, are permitted provided that the following conditions ++# are met: ++# 1. Redistributions of source code must retain the above copyright ++# notice, this list of conditions and the following disclaimer. ++# 2. Redistributions in binary form must reproduce the above copyright ++# notice, this list of conditions and the following disclaimer in the ++# documentation and/or other materials provided with the distribution. ++# ++# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++use strict; ++use warnings; ++ ++# Set $have_stacktrace to 1 if we have Devel::StackTrace ++my $have_stacktrace = 0; ++if (eval {require Devel::StackTrace;1;}) { ++ $have_stacktrace = 1; ++} ++ ++my @regs = map("x$_",(0..31)); ++# Mapping from the RISC-V psABI ABI mnemonic names to the register number. ++my @regaliases = ('zero','ra','sp','gp','tp','t0','t1','t2','s0','s1', ++ map("a$_",(0..7)), ++ map("s$_",(2..11)), ++ map("t$_",(3..6)) ++); ++ ++my %reglookup; ++@reglookup{@regs} = @regs; ++@reglookup{@regaliases} = @regs; ++ ++# Takes a register name, possibly an alias, and converts it to a register index ++# from 0 to 31 ++sub read_reg { ++ my $reg = lc shift; ++ if (!exists($reglookup{$reg})) { ++ my $trace = ""; ++ if ($have_stacktrace) { ++ $trace = Devel::StackTrace->new->as_string; ++ } ++ die("Unknown register ".$reg."\n".$trace); ++ } ++ my $regstr = $reglookup{$reg}; ++ if (!($regstr =~ /^x([0-9]+)$/)) { ++ my $trace = ""; ++ if ($have_stacktrace) { ++ $trace = Devel::StackTrace->new->as_string; ++ } ++ die("Could not process register ".$reg."\n".$trace); ++ } ++ return $1; ++} ++ ++# Read the sew setting(8, 16, 32 and 64) and convert to vsew encoding. ++sub read_sew { ++ my $sew_setting = shift; ++ ++ if ($sew_setting eq "e8") { ++ return 0; ++ } elsif ($sew_setting eq "e16") { ++ return 1; ++ } elsif ($sew_setting eq "e32") { ++ return 2; ++ } elsif ($sew_setting eq "e64") { ++ return 3; ++ } else { ++ my $trace = ""; ++ if ($have_stacktrace) { ++ $trace = Devel::StackTrace->new->as_string; ++ } ++ die("Unsupported SEW setting:".$sew_setting."\n".$trace); ++ } ++} ++ ++# Read the LMUL settings and convert to vlmul encoding. ++sub read_lmul { ++ my $lmul_setting = shift; ++ ++ if ($lmul_setting eq "mf8") { ++ return 5; ++ } elsif ($lmul_setting eq "mf4") { ++ return 6; ++ } elsif ($lmul_setting eq "mf2") { ++ return 7; ++ } elsif ($lmul_setting eq "m1") { ++ return 0; ++ } elsif ($lmul_setting eq "m2") { ++ return 1; ++ } elsif ($lmul_setting eq "m4") { ++ return 2; ++ } elsif ($lmul_setting eq "m8") { ++ return 3; ++ } else { ++ my $trace = ""; ++ if ($have_stacktrace) { ++ $trace = Devel::StackTrace->new->as_string; ++ } ++ die("Unsupported LMUL setting:".$lmul_setting."\n".$trace); ++ } ++} ++ ++# Read the tail policy settings and convert to vta encoding. ++sub read_tail_policy { ++ my $tail_setting = shift; ++ ++ if ($tail_setting eq "ta") { ++ return 1; ++ } elsif ($tail_setting eq "tu") { ++ return 0; ++ } else { ++ my $trace = ""; ++ if ($have_stacktrace) { ++ $trace = Devel::StackTrace->new->as_string; ++ } ++ die("Unsupported tail policy setting:".$tail_setting."\n".$trace); ++ } ++} ++ ++# Read the mask policy settings and convert to vma encoding. ++sub read_mask_policy { ++ my $mask_setting = shift; ++ ++ if ($mask_setting eq "ma") { ++ return 1; ++ } elsif ($mask_setting eq "mu") { ++ return 0; ++ } else { ++ my $trace = ""; ++ if ($have_stacktrace) { ++ $trace = Devel::StackTrace->new->as_string; ++ } ++ die("Unsupported mask policy setting:".$mask_setting."\n".$trace); ++ } ++} ++ ++my @vregs = map("v$_",(0..31)); ++my %vreglookup; ++@vreglookup{@vregs} = @vregs; ++ ++sub read_vreg { ++ my $vreg = lc shift; ++ if (!exists($vreglookup{$vreg})) { ++ my $trace = ""; ++ if ($have_stacktrace) { ++ $trace = Devel::StackTrace->new->as_string; ++ } ++ die("Unknown vector register ".$vreg."\n".$trace); ++ } ++ if (!($vreg =~ /^v([0-9]+)$/)) { ++ my $trace = ""; ++ if ($have_stacktrace) { ++ $trace = Devel::StackTrace->new->as_string; ++ } ++ die("Could not process vector register ".$vreg."\n".$trace); ++ } ++ return $1; ++} ++ ++# Read the vm settings and convert to mask encoding. ++sub read_mask_vreg { ++ my $vreg = shift; ++ # The default value is unmasked. ++ my $mask_bit = 1; ++ ++ if (defined($vreg)) { ++ my $reg_id = read_vreg $vreg; ++ if ($reg_id == 0) { ++ $mask_bit = 0; ++ } else { ++ my $trace = ""; ++ if ($have_stacktrace) { ++ $trace = Devel::StackTrace->new->as_string; ++ } ++ die("The ".$vreg." is not the mask register v0.\n".$trace); ++ } ++ } ++ return $mask_bit; ++} ++ ++# Helper functions ++ ++sub brev8_rv64i { ++ # brev8 without `brev8` instruction (only in Zbkb) ++ # Bit-reverses the first argument and needs two scratch registers ++ my $val = shift; ++ my $t0 = shift; ++ my $t1 = shift; ++ my $brev8_const = shift; ++ my $seq = <<___; ++ la $brev8_const, Lbrev8_const ++ ++ ld $t0, 0($brev8_const) # 0xAAAAAAAAAAAAAAAA ++ slli $t1, $val, 1 ++ and $t1, $t1, $t0 ++ and $val, $val, $t0 ++ srli $val, $val, 1 ++ or $val, $t1, $val ++ ++ ld $t0, 8($brev8_const) # 0xCCCCCCCCCCCCCCCC ++ slli $t1, $val, 2 ++ and $t1, $t1, $t0 ++ and $val, $val, $t0 ++ srli $val, $val, 2 ++ or $val, $t1, $val ++ ++ ld $t0, 16($brev8_const) # 0xF0F0F0F0F0F0F0F0 ++ slli $t1, $val, 4 ++ and $t1, $t1, $t0 ++ and $val, $val, $t0 ++ srli $val, $val, 4 ++ or $val, $t1, $val ++___ ++ return $seq; ++} ++ ++sub sd_rev8_rv64i { ++ # rev8 without `rev8` instruction (only in Zbb or Zbkb) ++ # Stores the given value byte-reversed and needs one scratch register ++ my $val = shift; ++ my $addr = shift; ++ my $off = shift; ++ my $tmp = shift; ++ my $off0 = ($off + 0); ++ my $off1 = ($off + 1); ++ my $off2 = ($off + 2); ++ my $off3 = ($off + 3); ++ my $off4 = ($off + 4); ++ my $off5 = ($off + 5); ++ my $off6 = ($off + 6); ++ my $off7 = ($off + 7); ++ my $seq = <<___; ++ sb $val, $off7($addr) ++ srli $tmp, $val, 8 ++ sb $tmp, $off6($addr) ++ srli $tmp, $val, 16 ++ sb $tmp, $off5($addr) ++ srli $tmp, $val, 24 ++ sb $tmp, $off4($addr) ++ srli $tmp, $val, 32 ++ sb $tmp, $off3($addr) ++ srli $tmp, $val, 40 ++ sb $tmp, $off2($addr) ++ srli $tmp, $val, 48 ++ sb $tmp, $off1($addr) ++ srli $tmp, $val, 56 ++ sb $tmp, $off0($addr) ++___ ++ return $seq; ++} ++ ++sub roriw_rv64i { ++ my ( ++ $rd, $rs, $tmp1, $tmp2, $imm, ++ ) = @_; ++ my $code=<<___; ++ srliw $tmp1, $rs, $imm ++ slliw $tmp2, $rs, (32-$imm) ++ or $rd, $tmp1, $tmp2 ++___ ++ return $code; ++} ++ ++# Scalar crypto instructions ++ ++sub aes64ds { ++ # Encoding for aes64ds rd, rs1, rs2 instruction on RV64 ++ # XXXXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX ++ my $template = 0b0011101_00000_00000_000_00000_0110011; ++ my $rd = read_reg shift; ++ my $rs1 = read_reg shift; ++ my $rs2 = read_reg shift; ++ return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7)); ++} ++ ++sub aes64dsm { ++ # Encoding for aes64dsm rd, rs1, rs2 instruction on RV64 ++ # XXXXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX ++ my $template = 0b0011111_00000_00000_000_00000_0110011; ++ my $rd = read_reg shift; ++ my $rs1 = read_reg shift; ++ my $rs2 = read_reg shift; ++ return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7)); ++} ++ ++sub aes64es { ++ # Encoding for aes64es rd, rs1, rs2 instruction on RV64 ++ # XXXXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX ++ my $template = 0b0011001_00000_00000_000_00000_0110011; ++ my $rd = read_reg shift; ++ my $rs1 = read_reg shift; ++ my $rs2 = read_reg shift; ++ return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7)); ++} ++ ++sub aes64esm { ++ # Encoding for aes64esm rd, rs1, rs2 instruction on RV64 ++ # XXXXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX ++ my $template = 0b0011011_00000_00000_000_00000_0110011; ++ my $rd = read_reg shift; ++ my $rs1 = read_reg shift; ++ my $rs2 = read_reg shift; ++ return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7)); ++} ++ ++sub aes64im { ++ # Encoding for aes64im rd, rs1 instruction on RV64 ++ # XXXXXXXXXXXX_ rs1 _XXX_ rd _XXXXXXX ++ my $template = 0b001100000000_00000_001_00000_0010011; ++ my $rd = read_reg shift; ++ my $rs1 = read_reg shift; ++ return ".word ".($template | ($rs1 << 15) | ($rd << 7)); ++} ++ ++sub aes64ks1i { ++ # Encoding for aes64ks1i rd, rs1, rnum instruction on RV64 ++ # XXXXXXXX_rnum_ rs1 _XXX_ rd _XXXXXXX ++ my $template = 0b00110001_0000_00000_001_00000_0010011; ++ my $rd = read_reg shift; ++ my $rs1 = read_reg shift; ++ my $rnum = shift; ++ return ".word ".($template | ($rnum << 20) | ($rs1 << 15) | ($rd << 7)); ++} ++ ++sub aes64ks2 { ++ # Encoding for aes64ks2 rd, rs1, rs2 instruction on RV64 ++ # XXXXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX ++ my $template = 0b0111111_00000_00000_000_00000_0110011; ++ my $rd = read_reg shift; ++ my $rs1 = read_reg shift; ++ my $rs2 = read_reg shift; ++ return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7)); ++} ++ ++sub brev8 { ++ # brev8 rd, rs ++ my $template = 0b011010000111_00000_101_00000_0010011; ++ my $rd = read_reg shift; ++ my $rs = read_reg shift; ++ return ".word ".($template | ($rs << 15) | ($rd << 7)); ++} ++ ++sub clmul { ++ # Encoding for clmul rd, rs1, rs2 instruction on RV64 ++ # XXXXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX ++ my $template = 0b0000101_00000_00000_001_00000_0110011; ++ my $rd = read_reg shift; ++ my $rs1 = read_reg shift; ++ my $rs2 = read_reg shift; ++ return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7)); ++} ++ ++sub clmulh { ++ # Encoding for clmulh rd, rs1, rs2 instruction on RV64 ++ # XXXXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX ++ my $template = 0b0000101_00000_00000_011_00000_0110011; ++ my $rd = read_reg shift; ++ my $rs1 = read_reg shift; ++ my $rs2 = read_reg shift; ++ return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7)); ++} ++ ++sub rev8 { ++ # Encoding for rev8 rd, rs instruction on RV64 ++ # XXXXXXXXXXXXX_ rs _XXX_ rd _XXXXXXX ++ my $template = 0b011010111000_00000_101_00000_0010011; ++ my $rd = read_reg shift; ++ my $rs = read_reg shift; ++ return ".word ".($template | ($rs << 15) | ($rd << 7)); ++} ++ ++sub rori { ++ # Encoding for rori rd, rs1, shamt instruction on RV64 ++ # XXXXXXX_ shamt _ rs1 _XXX_ rd _XXXXXXX ++ my $template = 0b0110000_00000_00000_101_00000_0010011; ++ my $rd = read_reg shift; ++ my $rs1 = read_reg shift; ++ my $shamt = shift; ++ return ".word ".($template | ($shamt << 20) | ($rs1 << 15) | ($rd << 7)); ++} ++ ++sub roriw { ++ # Encoding for roriw rd, rs1, shamt instruction on RV64 ++ # XXXXXXX_ shamt _ rs1 _XXX_ rd _XXXXXXX ++ my $template = 0b0110000_00000_00000_101_00000_0011011; ++ my $rd = read_reg shift; ++ my $rs1 = read_reg shift; ++ my $shamt = shift; ++ return ".word ".($template | ($shamt << 20) | ($rs1 << 15) | ($rd << 7)); ++} ++ ++sub maxu { ++ # Encoding for maxu rd, rs1, rs2 instruction on RV64 ++ # XXXXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX ++ my $template = 0b0000101_00000_00000_111_00000_0110011; ++ my $rd = read_reg shift; ++ my $rs1 = read_reg shift; ++ my $rs2 = read_reg shift; ++ return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7)); ++} ++ ++sub minu { ++ # Encoding for minu rd, rs1, rs2 instruction on RV64 ++ # XXXXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX ++ my $template = 0b0000101_00000_00000_101_00000_0110011; ++ my $rd = read_reg shift; ++ my $rs1 = read_reg shift; ++ my $rs2 = read_reg shift; ++ return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7)); ++} ++ ++# Vector instructions ++ ++sub vadd_vv { ++ # vadd.vv vd, vs2, vs1, vm ++ my $template = 0b000000_0_00000_00000_000_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $vs1 = read_vreg shift; ++ my $vm = read_mask_vreg shift; ++ return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($vs1 << 15) | ($vd << 7)); ++} ++ ++sub vadd_vx { ++ # vadd.vx vd, vs2, rs1, vm ++ my $template = 0b000000_0_00000_00000_100_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $rs1 = read_reg shift; ++ my $vm = read_mask_vreg shift; ++ return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($rs1 << 15) | ($vd << 7)); ++} ++ ++sub vsub_vv { ++ # vsub.vv vd, vs2, vs1, vm ++ my $template = 0b000010_0_00000_00000_000_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $vs1 = read_vreg shift; ++ my $vm = read_mask_vreg shift; ++ return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($vs1 << 15) | ($vd << 7)); ++} ++ ++sub vsub_vx { ++ # vsub.vx vd, vs2, rs1, vm ++ my $template = 0b000010_0_00000_00000_100_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $rs1 = read_reg shift; ++ my $vm = read_mask_vreg shift; ++ return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($rs1 << 15) | ($vd << 7)); ++} ++ ++sub vid_v { ++ # vid.v vd ++ my $template = 0b0101001_00000_10001_010_00000_1010111; ++ my $vd = read_vreg shift; ++ return ".word ".($template | ($vd << 7)); ++} ++ ++sub viota_m { ++ # viota.m vd, vs2, vm ++ my $template = 0b010100_0_00000_10000_010_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $vm = read_mask_vreg shift; ++ return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($vd << 7)); ++} ++ ++sub vle8_v { ++ # vle8.v vd, (rs1), vm ++ my $template = 0b000000_0_00000_00000_000_00000_0000111; ++ my $vd = read_vreg shift; ++ my $rs1 = read_reg shift; ++ my $vm = read_mask_vreg shift; ++ return ".word ".($template | ($vm << 25) | ($rs1 << 15) | ($vd << 7)); ++} ++ ++sub vle32_v { ++ # vle32.v vd, (rs1), vm ++ my $template = 0b000000_0_00000_00000_110_00000_0000111; ++ my $vd = read_vreg shift; ++ my $rs1 = read_reg shift; ++ my $vm = read_mask_vreg shift; ++ return ".word ".($template | ($vm << 25) | ($rs1 << 15) | ($vd << 7)); ++} ++ ++sub vle64_v { ++ # vle64.v vd, (rs1) ++ my $template = 0b0000001_00000_00000_111_00000_0000111; ++ my $vd = read_vreg shift; ++ my $rs1 = read_reg shift; ++ return ".word ".($template | ($rs1 << 15) | ($vd << 7)); ++} ++ ++sub vlse32_v { ++ # vlse32.v vd, (rs1), rs2 ++ my $template = 0b0000101_00000_00000_110_00000_0000111; ++ my $vd = read_vreg shift; ++ my $rs1 = read_reg shift; ++ my $rs2 = read_reg shift; ++ return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($vd << 7)); ++} ++ ++sub vlsseg_nf_e32_v { ++ # vlssege32.v vd, (rs1), rs2 ++ my $template = 0b0000101_00000_00000_110_00000_0000111; ++ my $nf = shift; ++ $nf -= 1; ++ my $vd = read_vreg shift; ++ my $rs1 = read_reg shift; ++ my $rs2 = read_reg shift; ++ return ".word ".($template | ($nf << 29) | ($rs2 << 20) | ($rs1 << 15) | ($vd << 7)); ++} ++ ++sub vlse64_v { ++ # vlse64.v vd, (rs1), rs2 ++ my $template = 0b0000101_00000_00000_111_00000_0000111; ++ my $vd = read_vreg shift; ++ my $rs1 = read_reg shift; ++ my $rs2 = read_reg shift; ++ return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($vd << 7)); ++} ++ ++sub vluxei8_v { ++ # vluxei8.v vd, (rs1), vs2, vm ++ my $template = 0b000001_0_00000_00000_000_00000_0000111; ++ my $vd = read_vreg shift; ++ my $rs1 = read_reg shift; ++ my $vs2 = read_vreg shift; ++ my $vm = read_mask_vreg shift; ++ return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($rs1 << 15) | ($vd << 7)); ++} ++ ++sub vmerge_vim { ++ # vmerge.vim vd, vs2, imm, v0 ++ my $template = 0b0101110_00000_00000_011_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $imm = shift; ++ return ".word ".($template | ($vs2 << 20) | ($imm << 15) | ($vd << 7)); ++} ++ ++sub vmerge_vvm { ++ # vmerge.vvm vd vs2 vs1 ++ my $template = 0b0101110_00000_00000_000_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $vs1 = read_vreg shift; ++ return ".word ".($template | ($vs2 << 20) | ($vs1 << 15) | ($vd << 7)) ++} ++ ++sub vmseq_vi { ++ # vmseq.vi vd vs1, imm ++ my $template = 0b0110001_00000_00000_011_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs1 = read_vreg shift; ++ my $imm = shift; ++ return ".word ".($template | ($vs1 << 20) | ($imm << 15) | ($vd << 7)) ++} ++ ++sub vmsgtu_vx { ++ # vmsgtu.vx vd vs2, rs1, vm ++ my $template = 0b011110_0_00000_00000_100_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $rs1 = read_reg shift; ++ my $vm = read_mask_vreg shift; ++ return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($rs1 << 15) | ($vd << 7)) ++} ++ ++sub vmv_v_i { ++ # vmv.v.i vd, imm ++ my $template = 0b0101111_00000_00000_011_00000_1010111; ++ my $vd = read_vreg shift; ++ my $imm = shift; ++ return ".word ".($template | ($imm << 15) | ($vd << 7)); ++} ++ ++sub vmv_v_x { ++ # vmv.v.x vd, rs1 ++ my $template = 0b0101111_00000_00000_100_00000_1010111; ++ my $vd = read_vreg shift; ++ my $rs1 = read_reg shift; ++ return ".word ".($template | ($rs1 << 15) | ($vd << 7)); ++} ++ ++sub vmv_v_v { ++ # vmv.v.v vd, vs1 ++ my $template = 0b0101111_00000_00000_000_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs1 = read_vreg shift; ++ return ".word ".($template | ($vs1 << 15) | ($vd << 7)); ++} ++ ++sub vor_vv { ++ # vor.vv vd, vs2, vs1 ++ my $template = 0b0010101_00000_00000_000_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $vs1 = read_vreg shift; ++ return ".word ".($template | ($vs2 << 20) | ($vs1 << 15) | ($vd << 7)); ++} ++ ++sub vor_vv_v0t { ++ # vor.vv vd, vs2, vs1, v0.t ++ my $template = 0b0010100_00000_00000_000_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $vs1 = read_vreg shift; ++ return ".word ".($template | ($vs2 << 20) | ($vs1 << 15) | ($vd << 7)); ++} ++ ++sub vse8_v { ++ # vse8.v vd, (rs1), vm ++ my $template = 0b000000_0_00000_00000_000_00000_0100111; ++ my $vd = read_vreg shift; ++ my $rs1 = read_reg shift; ++ my $vm = read_mask_vreg shift; ++ return ".word ".($template | ($vm << 25) | ($rs1 << 15) | ($vd << 7)); ++} ++ ++sub vse32_v { ++ # vse32.v vd, (rs1), vm ++ my $template = 0b000000_0_00000_00000_110_00000_0100111; ++ my $vd = read_vreg shift; ++ my $rs1 = read_reg shift; ++ my $vm = read_mask_vreg shift; ++ return ".word ".($template | ($vm << 25) | ($rs1 << 15) | ($vd << 7)); ++} ++ ++sub vssseg_nf_e32_v { ++ # vsssege32.v vs3, (rs1), rs2 ++ my $template = 0b0000101_00000_00000_110_00000_0100111; ++ my $nf = shift; ++ $nf -= 1; ++ my $vs3 = read_vreg shift; ++ my $rs1 = read_reg shift; ++ my $rs2 = read_reg shift; ++ return ".word ".($template | ($nf << 29) | ($rs2 << 20) | ($rs1 << 15) | ($vs3 << 7)); ++} ++ ++sub vsuxei8_v { ++ # vsuxei8.v vs3, (rs1), vs2, vm ++ my $template = 0b000001_0_00000_00000_000_00000_0100111; ++ my $vs3 = read_vreg shift; ++ my $rs1 = read_reg shift; ++ my $vs2 = read_vreg shift; ++ my $vm = read_mask_vreg shift; ++ return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($rs1 << 15) | ($vs3 << 7)); ++} ++ ++sub vse64_v { ++ # vse64.v vd, (rs1) ++ my $template = 0b0000001_00000_00000_111_00000_0100111; ++ my $vd = read_vreg shift; ++ my $rs1 = read_reg shift; ++ return ".word ".($template | ($rs1 << 15) | ($vd << 7)); ++} ++ ++sub vsetivli__x0_2_e64_m1_tu_mu { ++ # vsetivli x0, 2, e64, m1, tu, mu ++ return ".word 0xc1817057"; ++} ++ ++sub vsetivli__x0_4_e32_m1_tu_mu { ++ # vsetivli x0, 4, e32, m1, tu, mu ++ return ".word 0xc1027057"; ++} ++ ++sub vsetivli__x0_4_e64_m1_tu_mu { ++ # vsetivli x0, 4, e64, m1, tu, mu ++ return ".word 0xc1827057"; ++} ++ ++sub vsetivli__x0_8_e32_m1_tu_mu { ++ # vsetivli x0, 8, e32, m1, tu, mu ++ return ".word 0xc1047057"; ++} ++ ++sub vsetvli { ++ # vsetvli rd, rs1, vtypei ++ my $template = 0b0_00000000000_00000_111_00000_1010111; ++ my $rd = read_reg shift; ++ my $rs1 = read_reg shift; ++ my $sew = read_sew shift; ++ my $lmul = read_lmul shift; ++ my $tail_policy = read_tail_policy shift; ++ my $mask_policy = read_mask_policy shift; ++ my $vtypei = ($mask_policy << 7) | ($tail_policy << 6) | ($sew << 3) | $lmul; ++ ++ return ".word ".($template | ($vtypei << 20) | ($rs1 << 15) | ($rd << 7)); ++} ++ ++sub vsetivli { ++ # vsetvli rd, uimm, vtypei ++ my $template = 0b11_0000000000_00000_111_00000_1010111; ++ my $rd = read_reg shift; ++ my $uimm = shift; ++ my $sew = read_sew shift; ++ my $lmul = read_lmul shift; ++ my $tail_policy = read_tail_policy shift; ++ my $mask_policy = read_mask_policy shift; ++ my $vtypei = ($mask_policy << 7) | ($tail_policy << 6) | ($sew << 3) | $lmul; ++ ++ return ".word ".($template | ($vtypei << 20) | ($uimm << 15) | ($rd << 7)); ++} ++ ++sub vslidedown_vi { ++ # vslidedown.vi vd, vs2, uimm ++ my $template = 0b0011111_00000_00000_011_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $uimm = shift; ++ return ".word ".($template | ($vs2 << 20) | ($uimm << 15) | ($vd << 7)); ++} ++ ++sub vslidedown_vx { ++ # vslidedown.vx vd, vs2, rs1 ++ my $template = 0b0011111_00000_00000_100_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $rs1 = read_reg shift; ++ return ".word ".($template | ($vs2 << 20) | ($rs1 << 15) | ($vd << 7)); ++} ++ ++sub vslideup_vi_v0t { ++ # vslideup.vi vd, vs2, uimm, v0.t ++ my $template = 0b0011100_00000_00000_011_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $uimm = shift; ++ return ".word ".($template | ($vs2 << 20) | ($uimm << 15) | ($vd << 7)); ++} ++ ++sub vslideup_vi { ++ # vslideup.vi vd, vs2, uimm ++ my $template = 0b0011101_00000_00000_011_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $uimm = shift; ++ return ".word ".($template | ($vs2 << 20) | ($uimm << 15) | ($vd << 7)); ++} ++ ++sub vsll_vi { ++ # vsll.vi vd, vs2, uimm, vm ++ my $template = 0b1001011_00000_00000_011_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $uimm = shift; ++ return ".word ".($template | ($vs2 << 20) | ($uimm << 15) | ($vd << 7)); ++} ++ ++sub vsrl_vi { ++ # vsrl.vi vd, vs2, uimm, vm ++ my $template = 0b1010001_00000_00000_011_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $uimm = shift; ++ return ".word ".($template | ($vs2 << 20) | ($uimm << 15) | ($vd << 7)); ++} ++ ++sub vsrl_vx { ++ # vsrl.vx vd, vs2, rs1 ++ my $template = 0b1010001_00000_00000_100_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $rs1 = read_reg shift; ++ return ".word ".($template | ($vs2 << 20) | ($rs1 << 15) | ($vd << 7)); ++} ++ ++sub vsse32_v { ++ # vse32.v vs3, (rs1), rs2 ++ my $template = 0b0000101_00000_00000_110_00000_0100111; ++ my $vs3 = read_vreg shift; ++ my $rs1 = read_reg shift; ++ my $rs2 = read_reg shift; ++ return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($vs3 << 7)); ++} ++ ++sub vsse64_v { ++ # vsse64.v vs3, (rs1), rs2 ++ my $template = 0b0000101_00000_00000_111_00000_0100111; ++ my $vs3 = read_vreg shift; ++ my $rs1 = read_reg shift; ++ my $rs2 = read_reg shift; ++ return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($vs3 << 7)); ++} ++ ++sub vxor_vv_v0t { ++ # vxor.vv vd, vs2, vs1, v0.t ++ my $template = 0b0010110_00000_00000_000_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $vs1 = read_vreg shift; ++ return ".word ".($template | ($vs2 << 20) | ($vs1 << 15) | ($vd << 7)); ++} ++ ++sub vxor_vv { ++ # vxor.vv vd, vs2, vs1 ++ my $template = 0b0010111_00000_00000_000_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $vs1 = read_vreg shift; ++ return ".word ".($template | ($vs2 << 20) | ($vs1 << 15) | ($vd << 7)); ++} ++ ++sub vzext_vf2 { ++ # vzext.vf2 vd, vs2, vm ++ my $template = 0b010010_0_00000_00110_010_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $vm = read_mask_vreg shift; ++ return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($vd << 7)); ++} ++ ++# Vector crypto instructions ++ ++## Zvbb and Zvkb instructions ++## ++## vandn (also in zvkb) ++## vbrev ++## vbrev8 (also in zvkb) ++## vrev8 (also in zvkb) ++## vclz ++## vctz ++## vcpop ++## vrol (also in zvkb) ++## vror (also in zvkb) ++## vwsll ++ ++sub vbrev8_v { ++ # vbrev8.v vd, vs2, vm ++ my $template = 0b010010_0_00000_01000_010_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $vm = read_mask_vreg shift; ++ return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($vd << 7)); ++} ++ ++sub vrev8_v { ++ # vrev8.v vd, vs2, vm ++ my $template = 0b010010_0_00000_01001_010_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $vm = read_mask_vreg shift; ++ return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($vd << 7)); ++} ++ ++sub vror_vi { ++ # vror.vi vd, vs2, uimm ++ my $template = 0b01010_0_1_00000_00000_011_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $uimm = shift; ++ my $uimm_i5 = $uimm >> 5; ++ my $uimm_i4_0 = $uimm & 0b11111; ++ ++ return ".word ".($template | ($uimm_i5 << 26) | ($vs2 << 20) | ($uimm_i4_0 << 15) | ($vd << 7)); ++} ++ ++sub vwsll_vv { ++ # vwsll.vv vd, vs2, vs1, vm ++ my $template = 0b110101_0_00000_00000_000_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $vs1 = read_vreg shift; ++ my $vm = read_mask_vreg shift; ++ return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($vs1 << 15) | ($vd << 7)); ++} ++ ++## Zvbc instructions ++ ++sub vclmulh_vx { ++ # vclmulh.vx vd, vs2, rs1 ++ my $template = 0b0011011_00000_00000_110_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $rs1 = read_reg shift; ++ return ".word ".($template | ($vs2 << 20) | ($rs1 << 15) | ($vd << 7)); ++} ++ ++sub vclmul_vx_v0t { ++ # vclmul.vx vd, vs2, rs1, v0.t ++ my $template = 0b0011000_00000_00000_110_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $rs1 = read_reg shift; ++ return ".word ".($template | ($vs2 << 20) | ($rs1 << 15) | ($vd << 7)); ++} ++ ++sub vclmul_vx { ++ # vclmul.vx vd, vs2, rs1 ++ my $template = 0b0011001_00000_00000_110_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $rs1 = read_reg shift; ++ return ".word ".($template | ($vs2 << 20) | ($rs1 << 15) | ($vd << 7)); ++} ++ ++## Zvkg instructions ++ ++sub vghsh_vv { ++ # vghsh.vv vd, vs2, vs1 ++ my $template = 0b1011001_00000_00000_010_00000_1110111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $vs1 = read_vreg shift; ++ return ".word ".($template | ($vs2 << 20) | ($vs1 << 15) | ($vd << 7)); ++} ++ ++sub vgmul_vv { ++ # vgmul.vv vd, vs2 ++ my $template = 0b1010001_00000_10001_010_00000_1110111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ return ".word ".($template | ($vs2 << 20) | ($vd << 7)); ++} ++ ++## Zvkned instructions ++ ++sub vaesdf_vs { ++ # vaesdf.vs vd, vs2 ++ my $template = 0b101001_1_00000_00001_010_00000_1110111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ return ".word ".($template | ($vs2 << 20) | ($vd << 7)); ++} ++ ++sub vaesdm_vs { ++ # vaesdm.vs vd, vs2 ++ my $template = 0b101001_1_00000_00000_010_00000_1110111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ return ".word ".($template | ($vs2 << 20) | ($vd << 7)); ++} ++ ++sub vaesef_vs { ++ # vaesef.vs vd, vs2 ++ my $template = 0b101001_1_00000_00011_010_00000_1110111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ return ".word ".($template | ($vs2 << 20) | ($vd << 7)); ++} ++ ++sub vaesem_vs { ++ # vaesem.vs vd, vs2 ++ my $template = 0b101001_1_00000_00010_010_00000_1110111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ return ".word ".($template | ($vs2 << 20) | ($vd << 7)); ++} ++ ++sub vaeskf1_vi { ++ # vaeskf1.vi vd, vs2, uimmm ++ my $template = 0b100010_1_00000_00000_010_00000_1110111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $uimm = shift; ++ return ".word ".($template | ($uimm << 15) | ($vs2 << 20) | ($vd << 7)); ++} ++ ++sub vaeskf2_vi { ++ # vaeskf2.vi vd, vs2, uimm ++ my $template = 0b101010_1_00000_00000_010_00000_1110111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $uimm = shift; ++ return ".word ".($template | ($vs2 << 20) | ($uimm << 15) | ($vd << 7)); ++} ++ ++sub vaesz_vs { ++ # vaesz.vs vd, vs2 ++ my $template = 0b101001_1_00000_00111_010_00000_1110111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ return ".word ".($template | ($vs2 << 20) | ($vd << 7)); ++} ++ ++## Zvknha and Zvknhb instructions ++ ++sub vsha2ms_vv { ++ # vsha2ms.vv vd, vs2, vs1 ++ my $template = 0b1011011_00000_00000_010_00000_1110111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $vs1 = read_vreg shift; ++ return ".word ".($template | ($vs2 << 20)| ($vs1 << 15 )| ($vd << 7)); ++} ++ ++sub vsha2ch_vv { ++ # vsha2ch.vv vd, vs2, vs1 ++ my $template = 0b101110_10000_00000_001_00000_01110111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $vs1 = read_vreg shift; ++ return ".word ".($template | ($vs2 << 20)| ($vs1 << 15 )| ($vd << 7)); ++} ++ ++sub vsha2cl_vv { ++ # vsha2cl.vv vd, vs2, vs1 ++ my $template = 0b101111_10000_00000_001_00000_01110111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $vs1 = read_vreg shift; ++ return ".word ".($template | ($vs2 << 20)| ($vs1 << 15 )| ($vd << 7)); ++} ++ ++## Zvksed instructions ++ ++sub vsm4k_vi { ++ # vsm4k.vi vd, vs2, uimm ++ my $template = 0b1000011_00000_00000_010_00000_1110111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $uimm = shift; ++ return ".word ".($template | ($vs2 << 20) | ($uimm << 15) | ($vd << 7)); ++} ++ ++sub vsm4r_vs { ++ # vsm4r.vs vd, vs2 ++ my $template = 0b1010011_00000_10000_010_00000_1110111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ return ".word ".($template | ($vs2 << 20) | ($vd << 7)); ++} ++ ++## zvksh instructions ++ ++sub vsm3c_vi { ++ # vsm3c.vi vd, vs2, uimm ++ my $template = 0b1010111_00000_00000_010_00000_1110111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $uimm = shift; ++ return ".word ".($template | ($vs2 << 20) | ($uimm << 15 ) | ($vd << 7)); ++} ++ ++sub vsm3me_vv { ++ # vsm3me.vv vd, vs2, vs1 ++ my $template = 0b1000001_00000_00000_010_00000_1110111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $vs1 = read_vreg shift; ++ return ".word ".($template | ($vs2 << 20) | ($vs1 << 15 ) | ($vd << 7)); ++} ++ ++1; +diff --git a/crypto/riscv64cpuid.pl b/crypto/riscv64cpuid.pl +new file mode 100644 +index 0000000..5dcdc5c +--- /dev/null ++++ b/crypto/riscv64cpuid.pl +@@ -0,0 +1,105 @@ ++#! /usr/bin/env perl ++# Copyright 2022 The OpenSSL Project Authors. All Rights Reserved. ++# ++# Licensed under the Apache License 2.0 (the "License"). You may not use ++# this file except in compliance with the License. You can obtain a copy ++# in the file LICENSE in the source distribution or at ++# https://www.openssl.org/source/license.html ++ ++ ++# $output is the last argument if it looks like a file (it has an extension) ++# $flavour is the first argument if it doesn't look like a file ++$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; ++$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; ++ ++$output and open STDOUT,">$output"; ++ ++{ ++my ($in_a,$in_b,$len,$x,$temp1,$temp2) = ('a0','a1','a2','t0','t1','t2'); ++$code.=<<___; ++################################################################################ ++# int CRYPTO_memcmp(const void * in_a, const void * in_b, size_t len) ++################################################################################ ++.text ++.balign 16 ++.globl CRYPTO_memcmp ++.type CRYPTO_memcmp,\@function ++CRYPTO_memcmp: ++ li $x,0 ++ beqz $len,2f # len == 0 ++1: ++ lbu $temp1,0($in_a) ++ lbu $temp2,0($in_b) ++ addi $in_a,$in_a,1 ++ addi $in_b,$in_b,1 ++ addi $len,$len,-1 ++ xor $temp1,$temp1,$temp2 ++ or $x,$x,$temp1 ++ bgtz $len,1b ++2: ++ mv a0,$x ++ ret ++___ ++} ++{ ++my ($ptr,$len,$temp1,$temp2) = ('a0','a1','t0','t1'); ++$code.=<<___; ++################################################################################ ++# void OPENSSL_cleanse(void *ptr, size_t len) ++################################################################################ ++.text ++.balign 16 ++.globl OPENSSL_cleanse ++.type OPENSSL_cleanse,\@function ++OPENSSL_cleanse: ++ beqz $len,2f # len == 0, return ++ srli $temp1,$len,4 ++ bnez $temp1,3f # len > 15 ++ ++1: # Store <= 15 individual bytes ++ sb x0,0($ptr) ++ addi $ptr,$ptr,1 ++ addi $len,$len,-1 ++ bnez $len,1b ++2: ++ ret ++ ++3: # Store individual bytes until we are aligned ++ andi $temp1,$ptr,0x7 ++ beqz $temp1,4f ++ sb x0,0($ptr) ++ addi $ptr,$ptr,1 ++ addi $len,$len,-1 ++ j 3b ++ ++4: # Store aligned dwords ++ li $temp2,8 ++4: ++ sd x0,0($ptr) ++ addi $ptr,$ptr,8 ++ addi $len,$len,-8 ++ bge $len,$temp2,4b # if len>=8 loop ++ bnez $len,1b # if len<8 and len != 0, store remaining bytes ++ ret ++___ ++} ++ ++{ ++my ($ret) = ('a0'); ++$code .= <<___; ++################################################################################ ++# size_t riscv_vlen_asm(void) ++# Return VLEN (i.e. the length of a vector register in bits). ++.p2align 3 ++.globl riscv_vlen_asm ++.type riscv_vlen_asm,\@function ++riscv_vlen_asm: ++ csrr $ret, vlenb ++ slli $ret, $ret, 3 ++ ret ++.size riscv_vlen_asm,.-riscv_vlen_asm ++___ ++} ++ ++print $code; ++close STDOUT or die "error closing STDOUT: $!"; +diff --git a/crypto/riscvcap.c b/crypto/riscvcap.c +new file mode 100644 +index 0000000..0c44b93 +--- /dev/null ++++ b/crypto/riscvcap.c +@@ -0,0 +1,145 @@ ++/* ++ * Copyright 2022-2025 The OpenSSL Project Authors. All Rights Reserved. ++ * ++ * Licensed under the Apache License 2.0 (the "License"). You may not use ++ * this file except in compliance with the License. You can obtain a copy ++ * in the file LICENSE in the source distribution or at ++ * https://www.openssl.org/source/license.html ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include "internal/cryptlib.h" ++ ++#define OPENSSL_RISCVCAP_IMPL ++#include "crypto/riscv_arch.h" ++ ++#ifdef OSSL_RISCV_HWPROBE ++# include ++# include ++# include ++# include ++#endif ++ ++extern size_t riscv_vlen_asm(void); ++ ++static void parse_env(const char *envstr); ++static void strtoupper(char *str); ++ ++static size_t vlen = 0; ++ ++#ifdef OSSL_RISCV_HWPROBE ++unsigned int OPENSSL_riscv_hwcap_P = 0; ++#endif ++ ++uint32_t OPENSSL_rdtsc(void) ++{ ++ return 0; ++} ++ ++size_t OPENSSL_instrument_bus(unsigned int *out, size_t cnt) ++{ ++ return 0; ++} ++ ++size_t OPENSSL_instrument_bus2(unsigned int *out, size_t cnt, size_t max) ++{ ++ return 0; ++} ++ ++static void strtoupper(char *str) ++{ ++ for (char *x = str; *x; ++x) ++ *x = toupper((unsigned char)*x); ++} ++ ++/* parse_env() parses a RISC-V architecture string. An example of such a string ++ * is "rv64gc_zba_zbb_zbc_zbs". Currently, the rv64gc part is ignored ++ * and we simply search for "_[extension]" in the arch string to see if we ++ * should enable a given extension. ++ */ ++#define BUFLEN 256 ++static void parse_env(const char *envstr) ++{ ++ char envstrupper[BUFLEN]; ++ char buf[BUFLEN]; ++ ++ /* Convert env str to all uppercase */ ++ OPENSSL_strlcpy(envstrupper, envstr, sizeof(envstrupper)); ++ strtoupper(envstrupper); ++ ++ for (size_t i = 0; i < kRISCVNumCaps; ++i) { ++ /* Prefix capability with underscore in preparation for search */ ++ BIO_snprintf(buf, BUFLEN, "_%s", RISCV_capabilities[i].name); ++ if (strstr(envstrupper, buf) != NULL) { ++ /* Match, set relevant bit in OPENSSL_riscvcap_P[] */ ++ OPENSSL_riscvcap_P[RISCV_capabilities[i].index] |= ++ (1 << RISCV_capabilities[i].bit_offset); ++ } ++ } ++} ++ ++#ifdef OSSL_RISCV_HWPROBE ++static long riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count, ++ size_t cpu_count, unsigned long *cpus, ++ unsigned int flags) ++{ ++ return syscall(__NR_riscv_hwprobe, pairs, pair_count, cpu_count, cpus, flags); ++} ++ ++static void hwprobe_to_cap(void) ++{ ++ long ret; ++ struct riscv_hwprobe pairs[OSSL_RISCV_HWPROBE_PAIR_COUNT] = { ++ OSSL_RISCV_HWPROBE_PAIR_CONTENT ++ }; ++ ++ ret = riscv_hwprobe(pairs, OSSL_RISCV_HWPROBE_PAIR_COUNT, 0, NULL, 0); ++ /* if hwprobe syscall does not exist, ret would be -ENOSYS */ ++ if (ret == 0) { ++ for (size_t i = 0; i < kRISCVNumCaps; ++i) { ++ for (size_t j = 0; j != OSSL_RISCV_HWPROBE_PAIR_COUNT; ++j) { ++ if (pairs[j].key == RISCV_capabilities[i].hwprobe_key ++ && (pairs[j].value & RISCV_capabilities[i].hwprobe_value) ++ != 0) ++ if (!IS_IN_DEPEND_VECTOR(RISCV_capabilities[i].bit_offset) || VECTOR_CAPABLE) ++ /* Match, set relevant bit in OPENSSL_riscvcap_P[] */ ++ OPENSSL_riscvcap_P[RISCV_capabilities[i].index] |= ++ (1 << RISCV_capabilities[i].bit_offset); ++ } ++ } ++ } ++} ++#endif /* OSSL_RISCV_HWPROBE */ ++ ++size_t riscv_vlen(void) ++{ ++ return vlen; ++} ++ ++void OPENSSL_cpuid_setup(void) ++{ ++ char *e; ++ static int trigger = 0; ++ ++ if (trigger != 0) ++ return; ++ trigger = 1; ++ ++ if ((e = getenv("OPENSSL_riscvcap"))) { ++ parse_env(e); ++ } ++#ifdef OSSL_RISCV_HWPROBE ++ else { ++ OPENSSL_riscv_hwcap_P = getauxval(AT_HWCAP); ++ hwprobe_to_cap(); ++ } ++#endif ++ ++ if (RISCV_HAS_V()) { ++ vlen = riscv_vlen_asm(); ++ } ++} +diff --git a/include/crypto/riscv_arch.def b/include/crypto/riscv_arch.def +new file mode 100644 +index 0000000..32147d0 +--- /dev/null ++++ b/include/crypto/riscv_arch.def +@@ -0,0 +1,61 @@ ++/* ++ * Copyright 2022-2024 The OpenSSL Project Authors. All Rights Reserved. ++ * ++ * Licensed under the Apache License 2.0 (the "License"). You may not use ++ * this file except in compliance with the License. You can obtain a copy ++ * in the file LICENSE in the source distribution or at ++ * https://www.openssl.org/source/license.html ++ */ ++ ++/* X Macro Definitions for Specification of RISC-V Arch Capabilities */ ++ ++/* ++ * Each RISC-V capability ends up encoded as a single set bit in an array of ++ * words. When specifying a new capability, write a new RISCV_DEFINE_CAP ++ * statement, with an argument as the extension name in all-caps, ++ * second argument as the index in the array where the capability will be stored ++ * and third argument as the index of the bit to be used to encode the ++ * capability. ++ * ++ * The fourth and the fifth arguments are copied from linux header asm/hwprobe.h. ++ * Directly coping values instead of macro names comes from the fact ++ * that an old version may lack definition of some macro. ++ * When there is no hwprobe key/value pair for a capability, the key is set to -1 ++ * and the value is set to 0, as when the hwprobe syscall returns a key of -1, ++ * the value is set to 0 and the corresponding capability would not be enabled. ++ * ++ * RISCV_DEFINE_CAP(EXTENSION NAME, array index, bit index, hwprobe key, hwprobe value) */ ++ ++RISCV_DEFINE_CAP(ZBA, 0, 0, 4, (1 << 3)) ++RISCV_DEFINE_CAP(ZBB, 0, 1, 4, (1 << 4)) ++RISCV_DEFINE_CAP(ZBC, 0, 2, 4, (1 << 7)) ++RISCV_DEFINE_CAP(ZBS, 0, 3, 4, (1 << 5)) ++RISCV_DEFINE_CAP(ZBKB, 0, 4, 4, (1 << 8)) ++RISCV_DEFINE_CAP(ZBKC, 0, 5, 4, (1 << 9)) ++RISCV_DEFINE_CAP(ZBKX, 0, 6, 4, (1 << 10)) ++RISCV_DEFINE_CAP(ZKND, 0, 7, 4, (1 << 11)) ++RISCV_DEFINE_CAP(ZKNE, 0, 8, 4, (1 << 12)) ++RISCV_DEFINE_CAP(ZKNH, 0, 9, 4, (1 << 13)) ++RISCV_DEFINE_CAP(ZKSED, 0, 10, 4, (1 << 14)) ++RISCV_DEFINE_CAP(ZKSH, 0, 11, 4, (1 << 15)) ++RISCV_DEFINE_CAP(ZKR, 0, 12, -1, 0) ++RISCV_DEFINE_CAP(ZKT, 0, 13, 4, (1 << 16)) ++RISCV_DEFINE_CAP(V, 0, 14, 4, (1 << 2)) ++RISCV_DEFINE_CAP(ZVBB, 0, 15, 4, (1 << 17)) ++RISCV_DEFINE_CAP(ZVBC, 0, 16, 4, (1 << 18)) ++RISCV_DEFINE_CAP(ZVKB, 0, 17, 4, (1 << 19)) ++RISCV_DEFINE_CAP(ZVKG, 0, 18, 4, (1 << 20)) ++RISCV_DEFINE_CAP(ZVKNED, 0, 19, 4, (1 << 21)) ++RISCV_DEFINE_CAP(ZVKNHA, 0, 20, 4, (1 << 22)) ++RISCV_DEFINE_CAP(ZVKNHB, 0, 21, 4, (1 << 23)) ++RISCV_DEFINE_CAP(ZVKSED, 0, 22, 4, (1 << 24)) ++RISCV_DEFINE_CAP(ZVKSH, 0, 23, 4, (1 << 25)) ++ ++/* ++ * In the future ... ++ * RISCV_DEFINE_CAP(ZFOO, 0, 31, ..., ...) ++ * RISCV_DEFINE_CAP(ZBAR, 1, 0, ..., ...) ++ * ... and so on. ++ */ ++ ++#undef RISCV_DEFINE_CAP +diff --git a/include/crypto/riscv_arch.h b/include/crypto/riscv_arch.h +new file mode 100644 +index 0000000..1d78eb7 +--- /dev/null ++++ b/include/crypto/riscv_arch.h +@@ -0,0 +1,125 @@ ++/* ++ * Copyright 2022-2025 The OpenSSL Project Authors. All Rights Reserved. ++ * ++ * Licensed under the Apache License 2.0 (the "License"). You may not use ++ * this file except in compliance with the License. You can obtain a copy ++ * in the file LICENSE in the source distribution or at ++ * https://www.openssl.org/source/license.html ++ */ ++ ++#ifndef OSSL_CRYPTO_RISCV_ARCH_H ++# define OSSL_CRYPTO_RISCV_ARCH_H ++ ++# include ++# include ++ ++# if defined(OPENSSL_SYS_LINUX) && !defined(FIPS_MODULE) ++# if __has_include() ++# include ++# /* ++ * Some environments using musl are reported to have the hwprobe.h include ++ * file but not have the __NR_riscv_hwprobe define. ++ */ ++# ifdef __NR_riscv_hwprobe ++# define OSSL_RISCV_HWPROBE ++# include ++extern unsigned int OPENSSL_riscv_hwcap_P; ++# define VECTOR_CAPABLE (OPENSSL_riscv_hwcap_P & COMPAT_HWCAP_ISA_V) ++# define ZVX_MIN 15 ++# define ZVX_MAX 23 ++# define IS_IN_DEPEND_VECTOR(offset) ((ZVX_MIN >= offset) && (offset <= ZVX_MAX)) ++# endif ++# endif ++# endif ++ ++# define RISCV_DEFINE_CAP(NAME, INDEX, BIT_INDEX, \ ++ HWPROBE_KEY, HWPROBE_VALUE) +1 ++extern uint32_t OPENSSL_riscvcap_P[ (( ++# include "riscv_arch.def" ++) + sizeof(uint32_t) - 1) / sizeof(uint32_t) ]; ++ ++# ifdef OPENSSL_RISCVCAP_IMPL ++# define RISCV_DEFINE_CAP(NAME, INDEX, BIT_INDEX, \ ++ HWPROBE_KEY, HWPROBE_VALUE) +1 ++uint32_t OPENSSL_riscvcap_P[ (( ++# include "riscv_arch.def" ++) + sizeof(uint32_t) - 1) / sizeof(uint32_t) ]; ++# endif ++ ++# define RISCV_DEFINE_CAP(NAME, INDEX, BIT_INDEX, \ ++ HWPROBE_KEY, HWPROBE_VALUE) \ ++ static inline int RISCV_HAS_##NAME(void) \ ++ { \ ++ return (OPENSSL_riscvcap_P[INDEX] & (1 << BIT_INDEX)) != 0; \ ++ } ++# include "riscv_arch.def" ++ ++struct RISCV_capability_s { ++ const char *name; ++ size_t index; ++ size_t bit_offset; ++# ifdef OSSL_RISCV_HWPROBE ++ int32_t hwprobe_key; ++ uint64_t hwprobe_value; ++# endif ++}; ++ ++# define RISCV_DEFINE_CAP(NAME, INDEX, BIT_INDEX, \ ++ OSSL_RISCV_HWPROBE_KEY, OSSL_RISCV_HWPROBE_VALUE) +1 ++extern const struct RISCV_capability_s RISCV_capabilities[ ++# include "riscv_arch.def" ++]; ++ ++# ifdef OPENSSL_RISCVCAP_IMPL ++# ifdef OSSL_RISCV_HWPROBE ++# define RISCV_DEFINE_CAP(NAME, INDEX, BIT_INDEX, \ ++ HWPROBE_KEY, HWPROBE_VALUE) \ ++ { #NAME, INDEX, BIT_INDEX, HWPROBE_KEY, HWPROBE_VALUE }, ++# else ++# define RISCV_DEFINE_CAP(NAME, INDEX, BIT_INDEX, \ ++ HWPROBE_KEY, HWPROBE_VALUE) \ ++ { #NAME, INDEX, BIT_INDEX }, ++# endif ++const struct RISCV_capability_s RISCV_capabilities[] = { ++# include "riscv_arch.def" ++}; ++# endif ++ ++# define RISCV_DEFINE_CAP(NAME, INDEX, BIT_INDEX, \ ++ HWPROBE_KEY, HWPROBE_VALUE) +1 ++static const size_t kRISCVNumCaps = ++# include "riscv_arch.def" ++; ++ ++# ifdef OSSL_RISCV_HWPROBE ++/* ++ * Content is an array of { hwprobe_key, 0 } where ++ * hwprobe_key is copied from asm/hwprobe.h. ++ * It should be updated along with riscv_arch.def. ++ */ ++# define OSSL_RISCV_HWPROBE_PAIR_COUNT 1 ++# define OSSL_RISCV_HWPROBE_PAIR_CONTENT \ ++ { 4, 0 }, ++# endif ++ ++/* Extension combination tests. */ ++#define RISCV_HAS_ZBB_AND_ZBC() (RISCV_HAS_ZBB() && RISCV_HAS_ZBC()) ++#define RISCV_HAS_ZBKB_AND_ZKND_AND_ZKNE() (RISCV_HAS_ZBKB() && RISCV_HAS_ZKND() && RISCV_HAS_ZKNE()) ++#define RISCV_HAS_ZKND_AND_ZKNE() (RISCV_HAS_ZKND() && RISCV_HAS_ZKNE()) ++/* ++ * The ZVBB is the superset of ZVKB extension. We use macro here to replace the ++ * `RISCV_HAS_ZVKB()` with `RISCV_HAS_ZVBB() || RISCV_HAS_ZVKB()`. ++ */ ++#define RISCV_HAS_ZVKB() (RISCV_HAS_ZVBB() || RISCV_HAS_ZVKB()) ++#define RISCV_HAS_ZVKB_AND_ZVKNHA() (RISCV_HAS_ZVKB() && RISCV_HAS_ZVKNHA()) ++#define RISCV_HAS_ZVKB_AND_ZVKNHB() (RISCV_HAS_ZVKB() && RISCV_HAS_ZVKNHB()) ++#define RISCV_HAS_ZVKB_AND_ZVKSED() (RISCV_HAS_ZVKB() && RISCV_HAS_ZVKSED()) ++#define RISCV_HAS_ZVKB_AND_ZVKSH() (RISCV_HAS_ZVKB() && RISCV_HAS_ZVKSH()) ++ ++/* ++ * Get the size of a vector register in bits (VLEN). ++ * If RISCV_HAS_V() is false, then this returns 0. ++ */ ++size_t riscv_vlen(void); ++ ++#endif +-- +2.27.0 + diff --git a/openssl.spec b/openssl.spec index df851e7..b395dc6 100644 --- a/openssl.spec +++ b/openssl.spec @@ -2,7 +2,7 @@ Name: openssl Epoch: 1 Version: 3.0.12 -Release: 17 +Release: 20 Summary: Cryptography and SSL/TLS Toolkit License: OpenSSL and SSLeay URL: https://www.openssl.org/ @@ -77,6 +77,10 @@ Patch64: backport-Release-the-drbg-in-the-global-default-context-befor.patch Patch65: backport-params-provide-a-faster-TRIE-based-param-lookup.patch Patch66: backport-CVE-2024-13176-Fix-timing-side-channel.patch + + +Patch70: backport-add-riscv64-assembly-support-and-features.patch + Patch9000: add-FIPS_mode_set-support.patch Patch9001: backport-CVE-2024-9143-Harden-BN_GF2m_poly2arr-against-misuse.patch Patch9002: Fix-build-error-for-ppc64le.patch @@ -281,6 +285,9 @@ make test || : %ldconfig_scriptlets libs %changelog +* Fri Jul 25 2025 liuqingtao - 1:3.0.12-20 +- backport add riscv64 assembly support and features + * Mon Mar 10 2025 mahailiang - 1:3.0.12-17 - add sw_64 support -- Gitee