From b3a1ef56346de016deb2d2bc2d4c18a063891466 Mon Sep 17 00:00:00 2001
From: Kuai Wei <kuaiwei.kw@alibaba-inc.com>
Date: Mon, 26 Feb 2024 11:43:19 +0800
Subject: [PATCH] [Backport] 8276799: Implementation of JEP 422: Linux/RISC-V
 Port

Summary: sync with riscv upstream riscv-port-jdk11u
  This is a combination of multiple patches.
Revert "[Misc] Add failed test cases to linux-riscv problem list"
Revert "[Misc] RISC-V backend build broken after merging VectorAPI"
Revert "[Backport] 8297476: Increase InlineSmallCode default from 1000 to 2500 for RISC-V"
Revert "[RISCV] Backport ShenandoahGC specific code"
Revert "[Backport] 8296771: RISC-V: C2: assert(false) failed: bad AD file"
Revert "[Misc] Fix ported TestLibmIntrinsics.java"
Revert "[Backport] 8295926: RISC-V: C1: Fix LIRGenerator::do_LibmIntrinsic"
Revert "[Backport] 8293100: RISC-V: Need to save and restore callee-saved FloatRegisters in StubGenerator::generate_call_stub"
Revert "[Backport] 8278743: riscv: Remove the x4 register saving logic in Java frames"
Revert "[Backport] 8287418: riscv: Fix correctness issue of MacroAssembler::movptr"
Revert "[RISCV] Backport RVV and support RVV-0.7.1"
Revert "[RISCV] support paired memory instruction in CSky (#217)"
Revert "[Backport] Backport RISC-V backend code from the openjdk/riscv-port repo's initial load"
8276799: Implementation of JEP 422: Linux/RISC-V Port
8283737: riscv: MacroAssembler::stop() should emit fixed-length instruction sequence
8285437: riscv: Fix MachNode size mismatch for MacroAssembler::verify_oops*
8287418: riscv: Fix correctness issue of MacroAssembler::movptr
8293100: RISC-V: Need to save and restore callee-saved FloatRegisters in StubGenerator::generate_call_stub
8295926: RISC-V: C1: Fix LIRGenerator::do_LibmIntrinsic
8291952: riscv: Remove PRAGMA_NONNULL_IGNORED
8308277: RISC-V: Improve vectorization of Match.sqrt() on floats
8282306: os::is_first_C_frame(frame*) crashes on invalid link access
[RISCV] support paired memory instruction in CSky (#217)
[RISCV] Backport RVV and support RVV-0.7.1
[Misc] Fix build/test failre after riscv port

Testing: jtreg tier1~3

Reviewers: sendaoYan, yulei

Issue: https://aone.alibaba-inc.com/v2/project/355606/req/55189627

CR: https://github.com/dragonwell-project/dragonwell11/pull/800
---
 make/autoconf/build-aux/autoconf-config.guess |    3 -
 make/autoconf/build-aux/autoconf-config.sub   |    2 -
 make/autoconf/hotspot.m4                      |    2 +-
 make/autoconf/libraries.m4                    |    7 +-
 make/autoconf/platform.m4                     |    2 +-
 make/hotspot/gensrc/GensrcAdlc.gmk            |    8 +-
 .../cpu/aarch64/c1_LIRAssembler_aarch64.cpp   |    2 +-
 src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp   |    2 +-
 src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp   |    4 +-
 .../cpu/riscv/abstractInterpreter_riscv.cpp   |   13 +-
 src/hotspot/cpu/riscv/assembler_riscv.cpp     |   25 +-
 src/hotspot/cpu/riscv/assembler_riscv.hpp     | 1398 ++++++-
 src/hotspot/cpu/riscv/bytes_riscv.hpp         |    4 +-
 src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp  |  106 +-
 src/hotspot/cpu/riscv/c1_Defs_riscv.hpp       |    5 +-
 .../cpu/riscv/c1_FpuStackSim_riscv.cpp        |    5 +-
 .../cpu/riscv/c1_FpuStackSim_riscv.hpp        |    5 +-
 src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp   |   17 +-
 src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp   |    5 +-
 .../cpu/riscv/c1_LIRAssembler_arith_riscv.cpp |   31 +-
 .../cpu/riscv/c1_LIRAssembler_arith_riscv.hpp |    4 +-
 .../riscv/c1_LIRAssembler_arraycopy_riscv.cpp |    9 +-
 .../riscv/c1_LIRAssembler_arraycopy_riscv.hpp |    3 +-
 .../cpu/riscv/c1_LIRAssembler_riscv.cpp       |  156 +-
 .../cpu/riscv/c1_LIRAssembler_riscv.hpp       |   11 +-
 .../cpu/riscv/c1_LIRGenerator_riscv.cpp       |   76 +-
 src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp |    4 +-
 src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp |    6 +-
 .../cpu/riscv/c1_MacroAssembler_riscv.cpp     |   44 +-
 .../cpu/riscv/c1_MacroAssembler_riscv.hpp     |   10 +-
 src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp   |   79 +-
 src/hotspot/cpu/riscv/c1_globals_riscv.hpp    |    7 +-
 src/hotspot/cpu/riscv/c2_globals_riscv.hpp    |    9 +-
 src/hotspot/cpu/riscv/c2_init_riscv.cpp       |    4 +-
 src/hotspot/cpu/riscv/compiledIC_riscv.cpp    |    9 +-
 src/hotspot/cpu/riscv/copy_riscv.hpp          |    5 +-
 src/hotspot/cpu/riscv/depChecker_riscv.hpp    |   10 +-
 src/hotspot/cpu/riscv/disassembler_riscv.hpp  |   33 +-
 src/hotspot/cpu/riscv/frame_riscv.cpp         |   75 +-
 src/hotspot/cpu/riscv/frame_riscv.hpp         |   36 +-
 src/hotspot/cpu/riscv/frame_riscv.inline.hpp  |   30 +-
 .../gc/g1/g1BarrierSetAssembler_riscv.cpp     |   26 +-
 .../gc/g1/g1BarrierSetAssembler_riscv.hpp     |    6 +-
 .../cpu/riscv/gc/g1/g1Globals_riscv.hpp       |    1 +
 .../gc/shared/barrierSetAssembler_riscv.cpp   |   35 +-
 .../gc/shared/barrierSetAssembler_riscv.hpp   |    7 +-
 .../cardTableBarrierSetAssembler_riscv.cpp    |   20 +-
 .../cardTableBarrierSetAssembler_riscv.hpp    |    7 +-
 .../modRefBarrierSetAssembler_riscv.hpp       |    6 +-
 .../c1/shenandoahBarrierSetC1_riscv.cpp       |   11 +-
 .../shenandoahBarrierSetAssembler_riscv.cpp   |   94 +-
 .../shenandoahBarrierSetAssembler_riscv.hpp   |   55 +-
 .../riscv/gc/shenandoah/shenandoah_riscv64.ad |   13 +-
 .../cpu/riscv/globalDefinitions_riscv.hpp     |   16 +-
 src/hotspot/cpu/riscv/globals_riscv.hpp       |   60 +-
 src/hotspot/cpu/riscv/icBuffer_riscv.cpp      |    6 +-
 src/hotspot/cpu/riscv/icache_riscv.cpp        |    4 +-
 src/hotspot/cpu/riscv/icache_riscv.hpp        |    4 +-
 src/hotspot/cpu/riscv/interp_masm_riscv.cpp   |  103 +-
 src/hotspot/cpu/riscv/interp_masm_riscv.hpp   |    4 +-
 src/hotspot/cpu/riscv/interpreterRT_riscv.cpp |  228 +-
 src/hotspot/cpu/riscv/interpreterRT_riscv.hpp |    8 +-
 .../cpu/riscv/javaFrameAnchor_riscv.hpp       |   12 +-
 .../cpu/riscv/jniFastGetField_riscv.cpp       |   33 +-
 src/hotspot/cpu/riscv/jniTypes_riscv.hpp      |   10 +-
 .../cpu/riscv/macroAssembler_riscv.cpp        | 3583 ++++++++++-------
 .../cpu/riscv/macroAssembler_riscv.hpp        |  474 +--
 .../cpu/riscv/macroAssembler_riscv.inline.hpp |    5 +-
 src/hotspot/cpu/riscv/matcher_riscv.hpp       |  144 -
 src/hotspot/cpu/riscv/methodHandles_riscv.cpp |   13 +-
 src/hotspot/cpu/riscv/methodHandles_riscv.hpp |    1 -
 src/hotspot/cpu/riscv/nativeInst_riscv.cpp    |   42 +-
 src/hotspot/cpu/riscv/nativeInst_riscv.hpp    |  199 +-
 src/hotspot/cpu/riscv/registerMap_riscv.hpp   |    2 -
 .../cpu/riscv/register_definitions_riscv.cpp  |    6 +-
 src/hotspot/cpu/riscv/register_riscv.cpp      |   17 +-
 src/hotspot/cpu/riscv/register_riscv.hpp      |   96 +-
 src/hotspot/cpu/riscv/relocInfo_riscv.cpp     |    5 +-
 src/hotspot/cpu/riscv/relocInfo_riscv.hpp     |    1 -
 src/hotspot/cpu/riscv/riscv.ad                |  894 ++--
 src/hotspot/cpu/riscv/riscv_b.ad              |  451 +++
 src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp |  616 +--
 src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 1125 +++++-
 src/hotspot/cpu/riscv/stubRoutines_riscv.cpp  |   44 +-
 src/hotspot/cpu/riscv/stubRoutines_riscv.hpp  |   49 +-
 .../templateInterpreterGenerator_riscv.cpp    |  134 +-
 src/hotspot/cpu/riscv/templateTable_riscv.cpp |  229 +-
 src/hotspot/cpu/riscv/vmStructs_riscv.hpp     |    1 -
 .../cpu/riscv/vm_version_ext_riscv.cpp        |   12 +-
 .../cpu/riscv/vm_version_ext_riscv.hpp        |    4 +-
 src/hotspot/cpu/riscv/vm_version_riscv.cpp    |  190 +-
 src/hotspot/cpu/riscv/vm_version_riscv.hpp    |   39 +-
 src/hotspot/cpu/riscv/vmreg_riscv.cpp         |    7 +-
 src/hotspot/cpu/riscv/vmreg_riscv.hpp         |   24 +-
 src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp  |   11 +-
 src/hotspot/cpu/riscv/vtableStubs_riscv.cpp   |    4 +-
 src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp |    4 +-
 src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp   |    2 +-
 src/hotspot/os/linux/os_linux.cpp             |    2 -
 src/hotspot/os/posix/os_posix.cpp             |    6 +-
 .../linux_riscv/assembler_linux_riscv.cpp     |    2 -
 .../os_cpu/linux_riscv/atomic_linux_riscv.hpp |    5 +-
 .../os_cpu/linux_riscv/bytes_linux_riscv.hpp  |    1 -
 ..._riscv.hpp => copy_linux_riscv.inline.hpp} |   38 +-
 .../linux_riscv/globals_linux_riscv.hpp       |    5 +-
 .../linux_riscv/orderAccess_linux_riscv.hpp   |    1 +
 .../os_cpu/linux_riscv/os_linux_riscv.cpp     |  171 +-
 .../os_cpu/linux_riscv/thread_linux_riscv.cpp |   10 +-
 .../os_cpu/linux_riscv/thread_linux_riscv.hpp |    5 +-
 .../linux_riscv/vm_version_linux_riscv.cpp    |  118 +
 src/hotspot/share/c1/c1_LIR.cpp               |   11 +-
 src/hotspot/share/c1/c1_LIR.hpp               |   31 +-
 src/hotspot/share/c1/c1_LIRGenerator.cpp      |   19 +-
 src/hotspot/share/c1/c1_LinearScan.cpp        |    2 +-
 src/hotspot/share/c1/c1_Runtime1.cpp          |    2 +-
 .../gc/shenandoah/shenandoahArguments.cpp     |    4 +-
 src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp   |    2 +-
 src/hotspot/share/opto/matcher.hpp            |    6 +-
 src/hotspot/share/opto/regmask.hpp            |    2 +-
 src/hotspot/share/runtime/arguments.cpp       |    2 +-
 .../flags/jvmFlagConstraintsCompiler.cpp      |    2 +-
 src/hotspot/share/runtime/os.cpp              |    7 +-
 src/hotspot/share/runtime/synchronizer.cpp    |    2 +-
 src/hotspot/share/runtime/thread.inline.hpp   |    4 +-
 .../share/runtime/tieredThresholdPolicy.cpp   |    2 +-
 .../native/libsaproc/LinuxDebuggerLocal.c     |   49 +-
 .../linux/native/libsaproc/libproc.h          |    2 +-
 .../linux/native/libsaproc/ps_proc.c          |    2 +-
 .../classes/sun/jvm/hotspot/HotSpotAgent.java |    6 +-
 ...64.java => MachineDescriptionRISCV64.java} |    3 +-
 .../debugger/linux/LinuxCDebugger.java        |   13 +-
 .../linux/riscv64/LinuxRISCV64CFrame.java     |   90 +
 .../riscv64/LinuxRISCV64ThreadContext.java    |   48 +
 .../proc/riscv64/ProcRISCV64Thread.java       |   88 +
 .../riscv64/ProcRISCV64ThreadContext.java}    |   41 +-
 .../riscv64/ProcRISCV64ThreadFactory.java}    |   35 +-
 .../remote/riscv64/RemoteRISCV64Thread.java   |   55 +
 .../riscv64/RemoteRISCV64ThreadContext.java   |   48 +
 .../riscv64/RemoteRISCV64ThreadFactory.java}  |   35 +-
 .../debugger/risv64/RISCV64ThreadContext.java |  172 +
 .../sun/jvm/hotspot/runtime/Threads.java      |    5 +-
 .../LinuxRISCV64JavaThreadPDAccess.java       |  132 +
 .../riscv64/RISCV64CurrentFrameGuess.java     |  223 +
 .../hotspot/runtime/riscv64/RISCV64Frame.java |  554 +++
 .../riscv64/RISCV64JavaCallWrapper.java       |   59 +
 .../runtime/riscv64/RISCV64RegisterMap.java   |   53 +
 .../jvm/hotspot/utilities/PlatformInfo.java   |    2 +-
 src/utils/hsdis/hsdis.c                       |    4 +-
 test/hotspot/jtreg/ProblemList.txt            |  110 -
 test/hotspot/jtreg/compiler/c2/TestBit.java   |    7 +-
 .../floatingpoint/TestLibmIntrinsics.java     |    1 -
 ...eSHA1IntrinsicsOptionOnUnsupportedCPU.java |    6 +-
 ...HA256IntrinsicsOptionOnUnsupportedCPU.java |    6 +-
 ...HA512IntrinsicsOptionOnUnsupportedCPU.java |    6 +-
 .../cli/TestUseSHAOptionOnUnsupportedCPU.java |    6 +-
 .../testcases/GenericTestCaseForOtherCPU.java |   10 +-
 ...ericTestCaseForUnsupportedRISCV64CPU.java} |   85 +-
 .../loopopts/superword/ProdRed_Double.java    |    4 +-
 .../loopopts/superword/ProdRed_Float.java     |    4 +-
 .../loopopts/superword/ProdRed_Int.java       |    4 +-
 .../loopopts/superword/ReductionPerf.java     |    4 +-
 .../superword/SumRedAbsNeg_Double.java        |    4 +-
 .../superword/SumRedAbsNeg_Float.java         |    4 +-
 .../loopopts/superword/SumRedSqrt_Double.java |    4 +-
 .../loopopts/superword/SumRed_Double.java     |    4 +-
 .../loopopts/superword/SumRed_Float.java      |    4 +-
 .../loopopts/superword/SumRed_Int.java        |    4 +-
 .../argumentcorruption/CheckLongArgs.java     |    2 +-
 .../criticalnatives/lookup/LookUp.java        |    2 +-
 .../sha/predicate/IntrinsicPredicates.java    |    2 +-
 .../NMT/CheckForProperDetailStackTrace.java   |    4 +-
 .../ReservedStack/ReservedStackTest.java      |    6 +-
 .../cds/CdsDifferentCompactObjectHeaders.java |    1 +
 test/hotspot/jtreg/test_env.sh                |    5 -
 ...stMutuallyExclusivePlatformPredicates.java |    2 +-
 .../nsk/jvmti/GetThreadInfo/thrinfo001.java   |    2 +-
 test/jdk/ProblemList.txt                      |   35 -
 .../jdk/jfr/event/os/TestCPUInformation.java  |    6 +-
 test/langtools/ProblemList.txt                |    3 -
 test/lib/jdk/test/lib/Platform.java           |    2 +-
 180 files changed, 8993 insertions(+), 5000 deletions(-)
 delete mode 100644 src/hotspot/cpu/riscv/matcher_riscv.hpp
 create mode 100644 src/hotspot/cpu/riscv/riscv_b.ad
 rename src/hotspot/os_cpu/linux_riscv/{copy_linux_riscv.hpp => copy_linux_riscv.inline.hpp} (85%)
 create mode 100644 src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
 rename src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/{MachineDescriptionRiscv64.java => MachineDescriptionRISCV64.java} (90%)
 create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java
 create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java
 create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java
 rename src/{hotspot/cpu/riscv/registerMap_riscv.cpp => jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java} (56%)
 rename src/{hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp => jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java} (56%)
 create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java
 create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java
 rename src/{hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp => jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java} (55%)
 create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/risv64/RISCV64ThreadContext.java
 create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java
 create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java
 create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java
 create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java
 create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java
 rename test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/{GenericTestCaseForUnsupportedRiscv64CPU.java => GenericTestCaseForUnsupportedRISCV64CPU.java} (52%)

diff --git a/make/autoconf/build-aux/autoconf-config.guess b/make/autoconf/build-aux/autoconf-config.guess
index 1091acc872f..15ee4389269 100644
--- a/make/autoconf/build-aux/autoconf-config.guess
+++ b/make/autoconf/build-aux/autoconf-config.guess
@@ -1000,9 +1000,6 @@ EOF
     ppc:Linux:*:*)
 	echo powerpc-unknown-linux-gnu
 	exit ;;
-    riscv64:Linux:*:*)
-	echo riscv64-unknown-linux-gnu
-	exit ;;
     s390:Linux:*:* | s390x:Linux:*:*)
 	echo ${UNAME_MACHINE}-ibm-linux
 	exit ;;
diff --git a/make/autoconf/build-aux/autoconf-config.sub b/make/autoconf/build-aux/autoconf-config.sub
index b78cc3a3b3b..1aab2b303e3 100644
--- a/make/autoconf/build-aux/autoconf-config.sub
+++ b/make/autoconf/build-aux/autoconf-config.sub
@@ -302,7 +302,6 @@ case $basic_machine in
 	| pdp10 | pdp11 | pj | pjl \
 	| powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \
 	| pyramid \
-	| riscv64 \
 	| score \
 	| sh | sh[1234] | sh[24]a | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
 	| sh64 | sh64le \
@@ -384,7 +383,6 @@ case $basic_machine in
 	| pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
 	| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \
 	| pyramid-* \
-	| riscv64-* \
 	| romp-* | rs6000-* \
 	| sh-* | sh[1234]-* | sh[24]a-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
 	| shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
diff --git a/make/autoconf/hotspot.m4 b/make/autoconf/hotspot.m4
index 3a1d1efd19a..54e05363df7 100644
--- a/make/autoconf/hotspot.m4
+++ b/make/autoconf/hotspot.m4
@@ -371,7 +371,7 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES],
   if HOTSPOT_CHECK_JVM_FEATURE(shenandoahgc); then
     if test "x$OPENJDK_TARGET_CPU_ARCH" = "xx86" || \
        test "x$OPENJDK_TARGET_CPU" = "xaarch64" || \
-       test "x$OPENJDK_TARGET_CPU_ARCH" = "xriscv"; then
+       test "x$OPENJDK_TARGET_CPU" = "xriscv64"; then
       AC_MSG_RESULT([yes])
     else
       DISABLED_JVM_FEATURES="$DISABLED_JVM_FEATURES shenandoahgc"
diff --git a/make/autoconf/libraries.m4 b/make/autoconf/libraries.m4
index 4defcdb270b..5c49fd9285d 100644
--- a/make/autoconf/libraries.m4
+++ b/make/autoconf/libraries.m4
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, 2022, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -130,10 +130,9 @@ AC_DEFUN_ONCE([LIB_SETUP_LIBRARIES],
     BASIC_JVM_LIBS="$BASIC_JVM_LIBS -lthread"
   fi
 
-  # Programs which use C11 or C++11 atomics, like #include <atomic>,
-  # generally must link against -latomic on RISC-V
+  # Because RISC-V only has word-sized atomics, it requries libatomic where
+  # other common architectures do not.  So link libatomic by default.
   if test "x$OPENJDK_TARGET_OS" = xlinux && test "x$OPENJDK_TARGET_CPU" = xriscv64; then
-    BASIC_JDKLIB_LIBS="$BASIC_JDKLIB_LIBS -latomic"
     BASIC_JVM_LIBS="$BASIC_JVM_LIBS -latomic"
   fi
 
diff --git a/make/autoconf/platform.m4 b/make/autoconf/platform.m4
index fa3f6b6f126..bb4d516a377 100644
--- a/make/autoconf/platform.m4
+++ b/make/autoconf/platform.m4
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2011, 2021, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, 2022, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk
index 2d9f33eb754..6b6ca5b1b8e 100644
--- a/make/hotspot/gensrc/GensrcAdlc.gmk
+++ b/make/hotspot/gensrc/GensrcAdlc.gmk
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2013, 2021, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2013, 2022, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -150,6 +150,12 @@ ifeq ($(call check-jvm-feature, compiler2), true)
       $d/os_cpu/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH).ad \
     )))
 
+  ifeq ($(HOTSPOT_TARGET_CPU_ARCH), riscv)
+    AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
+        $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_b.ad \
+    )))
+  endif
+
   ifeq ($(call check-jvm-feature, shenandoahgc), true)
     AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
         $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/shenandoah/shenandoah_$(HOTSPOT_TARGET_CPU).ad \
diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
index 61e3048a944..3c1003c1b05 100644
--- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
diff --git a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
index a781fea7668..bbf96086fd4 100644
--- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
+++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
index 95fbbe8c424..6c97e9d31fa 100644
--- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2019, SAP SE. All rights reserved.
+ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2021 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
diff --git a/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp b/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp
index 9da28e37bef..31c63abe71d 100644
--- a/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp
+++ b/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -34,7 +34,6 @@
 #include "utilities/debug.hpp"
 #include "utilities/macros.hpp"
 
-
 int AbstractInterpreter::BasicType_as_index(BasicType type) {
   int i = 0;
   switch (type) {
@@ -102,7 +101,7 @@ int AbstractInterpreter::size_activation(int max_stack,
              // frame do we need to allow max_stack words.
              (is_top_frame ? max_stack : temps + extra_args);
 
-  // On riscv64 we always keep the stack pointer 16-aligned, so we
+  // On riscv we always keep the stack pointer 16-aligned, so we
   // must round up here.
   size = align_up(size, 2);
 
@@ -134,10 +133,9 @@ void AbstractInterpreter::layout_activation(Method* method,
 #endif
 
   interpreter_frame->interpreter_frame_set_method(method);
-  // NOTE the difference in using sender_sp and
-  // interpreter_frame_sender_sp interpreter_frame_sender_sp is
-  // the original sp of the caller (the unextended_sp) and
-  // sender_sp is fp+8/16 (32bit/64bit)
+  // NOTE the difference in using sender_sp and interpreter_frame_sender_sp
+  // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp)
+  // and sender_sp is fp
   intptr_t* locals = NULL;
   if (caller->is_interpreted_frame()) {
     locals = caller->interpreter_frame_last_sp() + caller_actual_parameters - 1;
@@ -171,6 +169,7 @@ void AbstractInterpreter::layout_activation(Method* method,
     interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() +
                                                        extra_locals);
   }
+
   *interpreter_frame->interpreter_frame_cache_addr() =
     method->constants()->cache();
   *interpreter_frame->interpreter_frame_mirror_addr() =
diff --git a/src/hotspot/cpu/riscv/assembler_riscv.cpp b/src/hotspot/cpu/riscv/assembler_riscv.cpp
index 323df0af865..a5f688cda1f 100644
--- a/src/hotspot/cpu/riscv/assembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/assembler_riscv.cpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -21,6 +21,7 @@
  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  * or visit www.oracle.com if you need additional information or have any
  * questions.
+ *
  */
 
 #include <stdio.h>
@@ -34,7 +35,6 @@
 #include "memory/resourceArea.hpp"
 #include "runtime/interfaceSupport.inline.hpp"
 #include "runtime/sharedRuntime.hpp"
-#include "nativeInst_riscv.hpp"
 
 int AbstractAssembler::code_fill_byte() {
   return 0;
@@ -80,11 +80,16 @@ void Assembler::subw(Register Rd, Register Rn, int64_t decrement, Register temp)
   }
 }
 
-void Assembler::li(Register Rd, int64_t imm) {
+void Assembler::zext_w(Register Rd, Register Rs) {
+  add_uw(Rd, Rs, zr);
+}
+
+void Assembler::_li(Register Rd, int64_t imm) {
   // int64_t is in range 0x8000 0000 0000 0000 ~ 0x7fff ffff ffff ffff
   int shift = 12;
   int64_t upper = imm, lower = imm;
-  // Split imm to a lower 12-bit sign-extended part and the remainder, because addi will sign-extend the lower imm.
+  // Split imm to a lower 12-bit sign-extended part and the remainder,
+  // because addi will sign-extend the lower imm.
   lower = ((int32_t)imm << 20) >> 20;
   upper -= lower;
 
@@ -98,8 +103,7 @@ void Assembler::li(Register Rd, int64_t imm) {
     if (lower != 0) {
       addi(Rd, Rd, lower);
     }
-  }
-  else {
+  } else {
     // 32-bit integer
     Register hi_Rd = zr;
     if (upper != 0) {
@@ -113,8 +117,8 @@ void Assembler::li(Register Rd, int64_t imm) {
 }
 
 void Assembler::li64(Register Rd, int64_t imm) {
-   // Load upper 32 bits. Upper = imm[63:32], but if imm[31] = 1 or (imm[31:28] == 0x7ff && imm[19] == 1),
-   // upper = imm[63:32] + 1.
+   // Load upper 32 bits. upper = imm[63:32], but if imm[31] == 1 or
+   // (imm[31:28] == 0x7ff && imm[19] == 1), upper = imm[63:32] + 1.
    int64_t lower = imm & 0xffffffff;
    lower -= ((lower << 44) >> 44);
    int64_t tmp_imm = ((uint64_t)(imm & 0xffffffff00000000)) + (uint64_t)lower;
@@ -209,13 +213,13 @@ void Assembler::ret() {
 
 #define INSN(NAME, REGISTER)                                   \
   void Assembler::NAME(const Address &adr, Register temp) {    \
-    switch(adr.getMode()) {                                    \
+    switch (adr.getMode()) {                                   \
       case Address::literal: {                                 \
         code_section()->relocate(pc(), adr.rspec());           \
         NAME(adr.target(), temp);                              \
         break;                                                 \
       }                                                        \
-      case Address::base_plus_offset:{                         \
+      case Address::base_plus_offset: {                        \
         int32_t offset = 0;                                    \
         baseOffset(temp, adr, offset);                         \
         jalr(REGISTER, temp, offset);                          \
@@ -366,4 +370,3 @@ Address::Address(address target, relocInfo::relocType rtype) : _base(noreg), _of
       ShouldNotReachHere();
   }
 }
-
diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp
index 928ece613c6..dc01c6112d0 100644
--- a/src/hotspot/cpu/riscv/assembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -29,8 +29,9 @@
 
 #include "asm/register.hpp"
 #include "assembler_riscv.inline.hpp"
+#include "metaprogramming/enableIf.hpp"
 
-#define registerSize 64
+#define XLEN 64
 
 // definitions of various symbolic names for machine registers
 
@@ -40,10 +41,10 @@
 class Argument {
  public:
   enum {
-    n_int_register_parameters_c   = 8,  // x10, x11, ... x17 (c_rarg0, c_rarg1, ...)
-    n_float_register_parameters_c = 8,  // f10, f11, ... f17 (c_farg0, c_farg1, ... )
+    n_int_register_parameters_c   = 8, // x10, x11, ... x17 (c_rarg0, c_rarg1, ...)
+    n_float_register_parameters_c = 8, // f10, f11, ... f17 (c_farg0, c_farg1, ... )
 
-    n_int_register_parameters_j   = 8, // x11, ... x17, x10 (rj_rarg0, j_rarg1, ...)
+    n_int_register_parameters_j   = 8, // x11, ... x17, x10 (j_rarg0, j_rarg1, ...)
     n_float_register_parameters_j = 8  // f10, f11, ... f17 (j_farg0, j_farg1, ...)
   };
 };
@@ -67,7 +68,21 @@ REGISTER_DECLARATION(FloatRegister, c_farg5, f15);
 REGISTER_DECLARATION(FloatRegister, c_farg6, f16);
 REGISTER_DECLARATION(FloatRegister, c_farg7, f17);
 
-// java function register(caller-save registers)
+// Symbolically name the register arguments used by the Java calling convention.
+// We have control over the convention for java so we can do what we please.
+// What pleases us is to offset the java calling convention so that when
+// we call a suitable jni method the arguments are lined up and we don't
+// have to do much shuffling. A suitable jni method is non-static and a
+// small number of arguments.
+//
+// |------------------------------------------------------------------------|
+// | c_rarg0  c_rarg1  c_rarg2  c_rarg3  c_rarg4  c_rarg5  c_rarg6  c_rarg7 |
+// |------------------------------------------------------------------------|
+// | x10      x11      x12      x13      x14      x15      x16      x17     |
+// |------------------------------------------------------------------------|
+// | j_rarg7  j_rarg0  j_rarg1  j_rarg2  j_rarg3  j_rarg4  j_rarg5  j_rarg6 |
+// |------------------------------------------------------------------------|
+
 REGISTER_DECLARATION(Register, j_rarg0, c_rarg1);
 REGISTER_DECLARATION(Register, j_rarg1, c_rarg2);
 REGISTER_DECLARATION(Register, j_rarg2, c_rarg3);
@@ -77,6 +92,8 @@ REGISTER_DECLARATION(Register, j_rarg5, c_rarg6);
 REGISTER_DECLARATION(Register, j_rarg6, c_rarg7);
 REGISTER_DECLARATION(Register, j_rarg7, c_rarg0);
 
+// Java floating args are passed as per C
+
 REGISTER_DECLARATION(FloatRegister, j_farg0, f10);
 REGISTER_DECLARATION(FloatRegister, j_farg1, f11);
 REGISTER_DECLARATION(FloatRegister, j_farg2, f12);
@@ -93,15 +110,15 @@ REGISTER_DECLARATION(Register, gp,        x3);
 // thread pointer
 REGISTER_DECLARATION(Register, tp,        x4);
 
+// registers used to hold VM data either temporarily within a method
+// or across method calls
+
 // volatile (caller-save) registers
 
 // current method -- must be in a call-clobbered register
 REGISTER_DECLARATION(Register, xmethod,   x31);
 // return address
 REGISTER_DECLARATION(Register, ra,        x1);
-// link rigster
-REGISTER_DECLARATION(Register, lr,        x1);
-
 
 // non-volatile (callee-save) registers
 
@@ -118,9 +135,6 @@ REGISTER_DECLARATION(Register, xmonitors, x25);
 // locals on stack
 REGISTER_DECLARATION(Register, xlocals,   x24);
 
-/* If you use x4(tp) as java thread pointer according to the instruction manual,
- * it overlaps with the register used by c++ thread.
- */
 // java thread pointer
 REGISTER_DECLARATION(Register, xthread,   x23);
 // bytecode pointer
@@ -130,13 +144,13 @@ REGISTER_DECLARATION(Register, xdispatch, x21);
 // Java stack pointer
 REGISTER_DECLARATION(Register, esp,       x20);
 
-// tempory register(caller-save registers)
+// temporary register(caller-save registers)
 REGISTER_DECLARATION(Register, t0, x5);
 REGISTER_DECLARATION(Register, t1, x6);
 REGISTER_DECLARATION(Register, t2, x7);
 
 const Register g_INTArgReg[Argument::n_int_register_parameters_c] = {
-  c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5,  c_rarg6,  c_rarg7
+  c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5, c_rarg6, c_rarg7
 };
 
 const FloatRegister g_FPArgReg[Argument::n_float_register_parameters_c] = {
@@ -168,22 +182,22 @@ class Address {
   Address()
     : _base(noreg), _index(noreg), _offset(0), _mode(no_mode), _target(NULL) { }
   Address(Register r)
-    : _base(r),     _index(noreg), _offset(0), _mode(base_plus_offset), _target(NULL) { }
+    : _base(r), _index(noreg), _offset(0), _mode(base_plus_offset), _target(NULL) { }
   Address(Register r, int o)
-    : _base(r),     _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
+    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
   Address(Register r, long o)
-    : _base(r),     _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
+    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
   Address(Register r, long long o)
-    : _base(r),     _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
+    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
   Address(Register r, unsigned int o)
-    : _base(r),     _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
+    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
   Address(Register r, unsigned long o)
-    : _base(r),     _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
+    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
   Address(Register r, unsigned long long o)
-    : _base(r),     _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
+    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
 #ifdef ASSERT
   Address(Register r, ByteSize disp)
-    : _base(r), _offset(in_bytes(disp)), _mode(base_plus_offset), _target(NULL) { }
+    : _base(r), _index(noreg), _offset(in_bytes(disp)), _mode(base_plus_offset), _target(0) { }
 #endif
   Address(address target, RelocationHolder const& rspec)
     : _base(noreg),
@@ -208,7 +222,7 @@ class Address {
     return _mode;
   }
 
-  bool uses(Register reg) const { return _base == reg;}
+  bool uses(Register reg) const { return _base == reg; }
   const address target() const { return _target; }
   const RelocationHolder& rspec() const { return _rspec; }
   ~Address() {
@@ -258,21 +272,11 @@ class InternalAddress: public Address {
   ~InternalAddress() {}
 };
 
-const int FPUStateSizeInWords = 32 * 2;
-
 class Assembler : public AbstractAssembler {
 public:
 
   enum { instruction_size = 4 };
 
-  //---<  calculate length of instruction  >---
-  // We just use the values set above.
-  // instruction must start at passed address
-  static unsigned int instr_len(unsigned char *instr) { return instruction_size; }
-
-  //---<  longest instructions  >---
-  static unsigned int instr_maxlen() { return instruction_size; }
-
   enum RoundingMode {
     rne = 0b000,     // round to Nearest, ties to Even
     rtz = 0b001,     // round towards Zero
@@ -282,33 +286,33 @@ class Assembler : public AbstractAssembler {
     rdy = 0b111,     // in instruction's rm field, selects dynamic rounding mode.In Rounding Mode register, Invalid.
   };
 
-  void baseOffset32(Register temp, const Address &adr, int32_t &offset) {
-    assert(temp != noreg, "temp must not be empty register!");
-    guarantee(adr.base() != temp, "should use different registers!");
+  void baseOffset32(Register Rd, const Address &adr, int32_t &offset) {
+    assert(Rd != noreg, "Rd must not be empty register!");
+    guarantee(Rd != adr.base(), "should use different registers!");
     if (is_offset_in_range(adr.offset(), 32)) {
       int32_t imm = adr.offset();
       int32_t upper = imm, lower = imm;
       lower = (imm << 20) >> 20;
       upper -= lower;
-      lui(temp, upper);
+      lui(Rd, upper);
       offset = lower;
     } else {
-      movptr_with_offset(temp, (address)(uintptr_t)adr.offset(), offset);
+      movptr_with_offset(Rd, (address)(uintptr_t)adr.offset(), offset);
     }
-    add(temp, temp, adr.base());
+    add(Rd, Rd, adr.base());
   }
 
-  void baseOffset(Register temp, const Address &adr, int32_t &offset) {
+  void baseOffset(Register Rd, const Address &adr, int32_t &offset) {
     if (is_offset_in_range(adr.offset(), 12)) {
-      assert(temp != noreg, "temp must not be empty register!");
-      addi(temp, adr.base(), adr.offset());
+      assert(Rd != noreg, "Rd must not be empty register!");
+      addi(Rd, adr.base(), adr.offset());
       offset = 0;
     } else {
-      baseOffset32(temp, adr, offset);
+      baseOffset32(Rd, adr, offset);
     }
   }
 
-  void li(Register Rd, int64_t imm);  // optimized load immediate
+  void _li(Register Rd, int64_t imm);  // optimized load immediate
   void li32(Register Rd, int32_t imm);
   void li64(Register Rd, int64_t imm);
   void movptr(Register Rd, address addr);
@@ -316,7 +320,7 @@ class Assembler : public AbstractAssembler {
   void movptr(Register Rd, uintptr_t imm64);
   void ifence();
   void j(const address &dest, Register temp = t0);
-  void j(const Address &adr, Register temp = t0) ;
+  void j(const Address &adr, Register temp = t0);
   void j(Label &l, Register temp = t0);
   void jal(Label &l, Register temp = t0);
   void jal(const address &dest, Register temp = t0);
@@ -385,7 +389,7 @@ class Assembler : public AbstractAssembler {
     emit_int32((jint)insn);
   }
 
-  void halt() {
+  void _halt() {
     emit_int32(0);
   }
 
@@ -402,18 +406,18 @@ class Assembler : public AbstractAssembler {
     emit(insn);                                                 \
   }
 
-  INSN(add,   0b0110011, 0b000, 0b0000000);
-  INSN(sub,   0b0110011, 0b000, 0b0100000);
-  INSN(andr,  0b0110011, 0b111, 0b0000000);
-  INSN(orr,   0b0110011, 0b110, 0b0000000);
-  INSN(xorr,  0b0110011, 0b100, 0b0000000);
+  INSN(_add,  0b0110011, 0b000, 0b0000000);
+  INSN(_sub,  0b0110011, 0b000, 0b0100000);
+  INSN(_andr, 0b0110011, 0b111, 0b0000000);
+  INSN(_orr,  0b0110011, 0b110, 0b0000000);
+  INSN(_xorr, 0b0110011, 0b100, 0b0000000);
   INSN(sll,   0b0110011, 0b001, 0b0000000);
   INSN(sra,   0b0110011, 0b101, 0b0100000);
   INSN(srl,   0b0110011, 0b101, 0b0000000);
   INSN(slt,   0b0110011, 0b010, 0b0000000);
   INSN(sltu,  0b0110011, 0b011, 0b0000000);
-  INSN(addw,  0b0111011, 0b000, 0b0000000);
-  INSN(subw,  0b0111011, 0b000, 0b0100000);
+  INSN(_addw, 0b0111011, 0b000, 0b0000000);
+  INSN(_subw, 0b0111011, 0b000, 0b0100000);
   INSN(sllw,  0b0111011, 0b001, 0b0000000);
   INSN(sraw,  0b0111011, 0b101, 0b0100000);
   INSN(srlw,  0b0111011, 0b101, 0b0000000);
@@ -431,9 +435,6 @@ class Assembler : public AbstractAssembler {
   INSN(remw,  0b0111011, 0b110, 0b0000001);
   INSN(remuw, 0b0111011, 0b111, 0b0000001);
 
-  // Vector Configuration Instruction
-  INSN(vsetvl, 0b1010111, 0b111, 0b1000000);
-
 #undef INSN
 
 #define INSN_ENTRY_RELOC(result_type, header)                               \
@@ -443,11 +444,11 @@ class Assembler : public AbstractAssembler {
               "only internal_word_type relocs make sense here");            \
     code_section()->relocate(inst_mark(), InternalAddress(dest).rspec());
 
-// Load/store register (all modes)
+  // Load/store register (all modes)
 #define INSN(NAME, op, funct3)                                                                     \
   void NAME(Register Rd, Register Rs, const int32_t offset) {                                      \
-    unsigned insn = 0;                                                                             \
     guarantee(is_offset_in_range(offset, 12), "offset is invalid.");                               \
+    unsigned insn = 0;                                                                             \
     int32_t val = offset & 0xfff;                                                                  \
     patch((address)&insn, 6, 0, op);                                                               \
     patch((address)&insn, 14, 12, funct3);                                                         \
@@ -455,7 +456,19 @@ class Assembler : public AbstractAssembler {
     patch_reg((address)&insn, 7, Rd);                                                              \
     patch((address)&insn, 31, 20, val);                                                            \
     emit(insn);                                                                                    \
-  }                                                                                                \
+  }
+
+  INSN(lb,  0b0000011, 0b000);
+  INSN(lbu, 0b0000011, 0b100);
+  INSN(lh,  0b0000011, 0b001);
+  INSN(lhu, 0b0000011, 0b101);
+  INSN(_lw, 0b0000011, 0b010);
+  INSN(lwu, 0b0000011, 0b110);
+  INSN(_ld, 0b0000011, 0b011);
+
+#undef INSN
+
+#define INSN(NAME)                                                                                 \
   void NAME(Register Rd, address dest) {                                                           \
     assert_cond(dest != NULL);                                                                     \
     int64_t distance = (dest - pc());                                                              \
@@ -472,13 +485,13 @@ class Assembler : public AbstractAssembler {
     NAME(Rd, dest);                                                                                \
   }                                                                                                \
   void NAME(Register Rd, const Address &adr, Register temp = t0) {                                 \
-    switch(adr.getMode()) {                                                                        \
+    switch (adr.getMode()) {                                                                       \
       case Address::literal: {                                                                     \
         code_section()->relocate(pc(), adr.rspec());                                               \
         NAME(Rd, adr.target());                                                                    \
         break;                                                                                     \
       }                                                                                            \
-      case Address::base_plus_offset:{                                                             \
+      case Address::base_plus_offset: {                                                            \
         if (is_offset_in_range(adr.offset(), 12)) {                                                \
           NAME(Rd, adr.base(), adr.offset());                                                      \
         } else {                                                                                   \
@@ -501,20 +514,20 @@ class Assembler : public AbstractAssembler {
     wrap_label(Rd, L, &Assembler::NAME);                                                           \
   }
 
-  INSN(lb,  0b0000011, 0b000);
-  INSN(lbu, 0b0000011, 0b100);
-  INSN(ld,  0b0000011, 0b011);
-  INSN(lh,  0b0000011, 0b001);
-  INSN(lhu, 0b0000011, 0b101);
-  INSN(lw,  0b0000011, 0b010);
-  INSN(lwu, 0b0000011, 0b110);
+  INSN(lb);
+  INSN(lbu);
+  INSN(lh);
+  INSN(lhu);
+  INSN(lw);
+  INSN(lwu);
+  INSN(ld);
 
 #undef INSN
 
 #define INSN(NAME, op, funct3)                                                                     \
   void NAME(FloatRegister Rd, Register Rs, const int32_t offset) {                                 \
-    unsigned insn = 0;                                                                             \
     guarantee(is_offset_in_range(offset, 12), "offset is invalid.");                               \
+    unsigned insn = 0;                                                                             \
     uint32_t val = offset & 0xfff;                                                                 \
     patch((address)&insn, 6, 0, op);                                                               \
     patch((address)&insn, 14, 12, funct3);                                                         \
@@ -522,7 +535,14 @@ class Assembler : public AbstractAssembler {
     patch_reg((address)&insn, 7, Rd);                                                              \
     patch((address)&insn, 31, 20, val);                                                            \
     emit(insn);                                                                                    \
-  }                                                                                                \
+  }
+
+  INSN(flw,  0b0000111, 0b010);
+  INSN(_fld, 0b0000111, 0b011);
+
+#undef INSN
+
+#define INSN(NAME)                                                                                 \
   void NAME(FloatRegister Rd, address dest, Register temp = t0) {                                  \
     assert_cond(dest != NULL);                                                                     \
     int64_t distance = (dest - pc());                                                              \
@@ -539,13 +559,13 @@ class Assembler : public AbstractAssembler {
     NAME(Rd, dest, temp);                                                                          \
   }                                                                                                \
   void NAME(FloatRegister Rd, const Address &adr, Register temp = t0) {                            \
-    switch(adr.getMode()) {                                                                        \
+    switch (adr.getMode()) {                                                                       \
       case Address::literal: {                                                                     \
         code_section()->relocate(pc(), adr.rspec());                                               \
         NAME(Rd, adr.target(), temp);                                                              \
         break;                                                                                     \
       }                                                                                            \
-      case Address::base_plus_offset:{                                                             \
+      case Address::base_plus_offset: {                                                            \
         if (is_offset_in_range(adr.offset(), 12)) {                                                \
           NAME(Rd, adr.base(), adr.offset());                                                      \
         } else {                                                                                   \
@@ -560,14 +580,14 @@ class Assembler : public AbstractAssembler {
     }                                                                                              \
   }
 
-  INSN(flw, 0b0000111, 0b010);
-  INSN(fld, 0b0000111, 0b011);
+  INSN(flw);
+  INSN(fld);
 #undef INSN
 
 #define INSN(NAME, op, funct3)                                                                           \
   void NAME(Register Rs1, Register Rs2, const int64_t offset) {                                          \
-    unsigned insn = 0;                                                                                   \
     guarantee(is_imm_in_range(offset, 12, 1), "offset is invalid.");                                     \
+    unsigned insn = 0;                                                                                   \
     uint32_t val  = offset & 0x1fff;                                                                     \
     uint32_t val11 = (val >> 11) & 0x1;                                                                  \
     uint32_t val12 = (val >> 12) & 0x1;                                                                  \
@@ -582,7 +602,18 @@ class Assembler : public AbstractAssembler {
     patch((address)&insn, 30, 25, high);                                                                 \
     patch((address)&insn, 31, val12);                                                                    \
     emit(insn);                                                                                          \
-  }                                                                                                      \
+  }
+
+  INSN(_beq, 0b1100011, 0b000);
+  INSN(_bne, 0b1100011, 0b001);
+  INSN(bge,  0b1100011, 0b101);
+  INSN(bgeu, 0b1100011, 0b111);
+  INSN(blt,  0b1100011, 0b100);
+  INSN(bltu, 0b1100011, 0b110);
+
+#undef INSN
+
+#define INSN(NAME)                                                                                       \
   void NAME(Register Rs1, Register Rs2, const address dest) {                                            \
     assert_cond(dest != NULL);                                                                           \
     int64_t offset = (dest - pc());                                                                      \
@@ -593,12 +624,12 @@ class Assembler : public AbstractAssembler {
     NAME(Rs1, Rs2, dest);                                                                                \
   }
 
-  INSN(beq,  0b1100011, 0b000);
-  INSN(bge,  0b1100011, 0b101);
-  INSN(bgeu, 0b1100011, 0b111);
-  INSN(blt,  0b1100011, 0b100);
-  INSN(bltu, 0b1100011, 0b110);
-  INSN(bne,  0b1100011, 0b001);
+  INSN(beq);
+  INSN(bne);
+  INSN(bge);
+  INSN(bgeu);
+  INSN(blt);
+  INSN(bltu);
 
 #undef INSN
 
@@ -618,8 +649,8 @@ class Assembler : public AbstractAssembler {
 
 #define INSN(NAME, REGISTER, op, funct3)                                                                    \
   void NAME(REGISTER Rs1, Register Rs2, const int32_t offset) {                                             \
-    unsigned insn = 0;                                                                                      \
     guarantee(is_offset_in_range(offset, 12), "offset is invalid.");                                        \
+    unsigned insn = 0;                                                                                      \
     uint32_t val  = offset & 0xfff;                                                                         \
     uint32_t low  = val & 0x1f;                                                                             \
     uint32_t high = (val >> 5) & 0x7f;                                                                      \
@@ -631,16 +662,27 @@ class Assembler : public AbstractAssembler {
     patch((address)&insn, 31, 25, high);                                                                    \
     emit(insn);                                                                                             \
   }                                                                                                         \
+
+  INSN(sb,   Register,      0b0100011, 0b000);
+  INSN(sh,   Register,      0b0100011, 0b001);
+  INSN(_sw,  Register,      0b0100011, 0b010);
+  INSN(_sd,  Register,      0b0100011, 0b011);
+  INSN(fsw,  FloatRegister, 0b0100111, 0b010);
+  INSN(_fsd, FloatRegister, 0b0100111, 0b011);
+
+#undef INSN
+
+#define INSN(NAME, REGISTER)                                                                                \
   INSN_ENTRY_RELOC(void, NAME(REGISTER Rs, address dest, relocInfo::relocType rtype, Register temp = t0))   \
     NAME(Rs, dest, temp);                                                                                   \
   }
 
-  INSN(sb,  Register,      0b0100011, 0b000);
-  INSN(sh,  Register,      0b0100011, 0b001);
-  INSN(sw,  Register,      0b0100011, 0b010);
-  INSN(sd,  Register,      0b0100011, 0b011);
-  INSN(fsw, FloatRegister, 0b0100111, 0b010);
-  INSN(fsd, FloatRegister, 0b0100111, 0b011);
+  INSN(sb,  Register);
+  INSN(sh,  Register);
+  INSN(sw,  Register);
+  INSN(sd,  Register);
+  INSN(fsw, FloatRegister);
+  INSN(fsd, FloatRegister);
 
 #undef INSN
 
@@ -659,14 +701,14 @@ class Assembler : public AbstractAssembler {
     }                                                                                              \
   }                                                                                                \
   void NAME(Register Rs, const Address &adr, Register temp = t0) {                                 \
-    switch(adr.getMode()) {                                                                        \
+    switch (adr.getMode()) {                                                                       \
       case Address::literal: {                                                                     \
         assert_different_registers(Rs, temp);                                                      \
         code_section()->relocate(pc(), adr.rspec());                                               \
         NAME(Rs, adr.target(), temp);                                                              \
         break;                                                                                     \
       }                                                                                            \
-      case Address::base_plus_offset:{                                                             \
+      case Address::base_plus_offset: {                                                            \
         if (is_offset_in_range(adr.offset(), 12)) {                                                \
           NAME(Rs, adr.base(), adr.offset());                                                      \
         } else {                                                                                   \
@@ -703,13 +745,13 @@ class Assembler : public AbstractAssembler {
     }                                                                                              \
   }                                                                                                \
   void NAME(FloatRegister Rs, const Address &adr, Register temp = t0) {                            \
-    switch(adr.getMode()) {                                                                        \
+    switch (adr.getMode()) {                                                                       \
       case Address::literal: {                                                                     \
         code_section()->relocate(pc(), adr.rspec());                                               \
         NAME(Rs, adr.target(), temp);                                                              \
         break;                                                                                     \
       }                                                                                            \
-      case Address::base_plus_offset:{                                                             \
+      case Address::base_plus_offset: {                                                            \
         if (is_offset_in_range(adr.offset(), 12)) {                                                \
           NAME(Rs, adr.base(), adr.offset());                                                      \
         } else {                                                                                   \
@@ -769,8 +811,8 @@ class Assembler : public AbstractAssembler {
 
 #define INSN(NAME, op)                                                                        \
   void NAME(Register Rd, const int32_t offset) {                                              \
-    unsigned insn = 0;                                                                        \
     guarantee(is_imm_in_range(offset, 20, 1), "offset is invalid.");                          \
+    unsigned insn = 0;                                                                        \
     patch((address)&insn, 6, 0, op);                                                          \
     patch_reg((address)&insn, 7, Rd);                                                         \
     patch((address)&insn, 19, 12, (uint32_t)((offset >> 12) & 0xff));                         \
@@ -778,7 +820,13 @@ class Assembler : public AbstractAssembler {
     patch((address)&insn, 30, 21, (uint32_t)((offset >> 1) & 0x3ff));                         \
     patch((address)&insn, 31, (uint32_t)((offset >> 20) & 0x1));                              \
     emit(insn);                                                                               \
-  }                                                                                           \
+  }
+
+  INSN(_jal, 0b1101111);
+
+#undef INSN
+
+#define INSN(NAME)                                                                            \
   void NAME(Register Rd, const address dest, Register temp = t0) {                            \
     assert_cond(dest != NULL);                                                                \
     int64_t offset = dest - pc();                                                             \
@@ -796,7 +844,7 @@ class Assembler : public AbstractAssembler {
     wrap_label(Rd, L, temp, &Assembler::NAME);                                                \
   }
 
-  INSN(jal, 0b1101111);
+  INSN(jal);
 
 #undef INSN
 
@@ -804,8 +852,8 @@ class Assembler : public AbstractAssembler {
 
 #define INSN(NAME, op, funct)                                                              \
   void NAME(Register Rd, Register Rs, const int32_t offset) {                              \
-    unsigned insn = 0;                                                                     \
     guarantee(is_offset_in_range(offset, 12), "offset is invalid.");                       \
+    unsigned insn = 0;                                                                     \
     patch((address)&insn, 6, 0, op);                                                       \
     patch_reg((address)&insn, 7, Rd);                                                      \
     patch((address)&insn, 14, 12, funct);                                                  \
@@ -815,7 +863,7 @@ class Assembler : public AbstractAssembler {
     emit(insn);                                                                            \
   }
 
-  INSN(jalr, 0b1100111, 0b000);
+  INSN(_jalr, 0b1100111, 0b000);
 
 #undef INSN
 
@@ -851,7 +899,8 @@ class Assembler : public AbstractAssembler {
 
   INSN(fence_i, 0b0001111, 0b001, 0b000000000000);
   INSN(ecall,   0b1110011, 0b000, 0b000000000000);
-  INSN(ebreak,  0b1110011, 0b000, 0b000000000001);
+  INSN(_ebreak, 0b1110011, 0b000, 0b000000000001);
+
 #undef INSN
 
 enum Aqrl {relaxed = 0b00, rl = 0b01, aq = 0b10, aqrl = 0b11};
@@ -959,12 +1008,12 @@ enum operand_size { int8, int16, int32, uint32, int64 };
     emit(insn);                                                                             \
   }
 
-  INSN(addi,  0b0010011, 0b000);
-  INSN(slti,  0b0010011, 0b010);
-  INSN(addiw, 0b0011011, 0b000);
-  INSN(and_imm12,  0b0010011, 0b111);
-  INSN(ori,   0b0010011, 0b110);
-  INSN(xori,  0b0010011, 0b100);
+  INSN(_addi,      0b0010011, 0b000);
+  INSN(slti,       0b0010011, 0b010);
+  INSN(_addiw,     0b0011011, 0b000);
+  INSN(_and_imm12, 0b0010011, 0b111);
+  INSN(ori,        0b0010011, 0b110);
+  INSN(xori,       0b0010011, 0b100);
 
 #undef INSN
 
@@ -998,9 +1047,9 @@ enum operand_size { int8, int16, int32, uint32, int64 };
     emit(insn);                                                          \
   }
 
-  INSN(slli,  0b0010011, 0b001, 0b000000);
-  INSN(srai,  0b0010011, 0b101, 0b010000);
-  INSN(srli,  0b0010011, 0b101, 0b000000);
+  INSN(_slli, 0b0010011, 0b001, 0b000000);
+  INSN(_srai, 0b0010011, 0b101, 0b010000);
+  INSN(_srli, 0b0010011, 0b101, 0b000000);
 
 #undef INSN
 
@@ -1036,7 +1085,7 @@ enum operand_size { int8, int16, int32, uint32, int64 };
     emit(insn);                                                         \
   }
 
-  INSN(lui,   0b0110111);
+  INSN(_lui,  0b0110111);
   INSN(auipc, 0b0010111);
 
 #undef INSN
@@ -1223,6 +1272,9 @@ enum operand_size { int8, int16, int32, uint32, int64 };
 
 #undef INSN
 
+// ==========================
+// RISC-V Vector Extension
+// ==========================
 enum SEW {
   e8    = 0b000,
   e16   = 0b001,
@@ -1265,7 +1317,7 @@ static Assembler::SEW elemtype_to_sew(BasicType etype) {
 
 #define patch_vtype(hsb, lsb, vlmul, vsew, vta, vma, vill)   \
     if (vill == 1) {                                         \
-      guarantee((vlmul | vsew | vsew | vta | vma == 0),      \
+      guarantee((vlmul | vsew | vta | vma == 0),             \
                 "the other bits in vtype shall be zero");    \
     }                                                        \
     patch((address)&insn, lsb + 2, lsb, vlmul);              \
@@ -1328,6 +1380,23 @@ static Assembler::SEW elemtype_to_sew(BasicType etype) {
 
 #undef patch_vtype
 
+#define INSN(NAME, op, funct3, funct7)                          \
+  void NAME(Register Rd, Register Rs1, Register Rs2) {          \
+    unsigned insn = 0;                                          \
+    patch((address)&insn, 6,  0, op);                           \
+    patch((address)&insn, 14, 12, funct3);                      \
+    patch((address)&insn, 31, 25, funct7);                      \
+    patch_reg((address)&insn, 7, Rd);                           \
+    patch_reg((address)&insn, 15, Rs1);                         \
+    patch_reg((address)&insn, 20, Rs2);                         \
+    emit(insn);                                                 \
+  }
+
+  // Vector Configuration Instruction
+  INSN(vsetvl, 0b1010111, 0b111, 0b1000000);
+
+#undef INSN
+
 enum VectorMask {
   v0_t = 0b0,
   unmasked = 0b1
@@ -1443,25 +1512,6 @@ enum VectorMask {
   INSN(vfcvt_rtz_xu_f_v, 0b1010111, 0b001, 0b00110, 0b010010);
   INSN(vfcvt_rtz_x_f_v,  0b1010111, 0b001, 0b00111, 0b010010);
 
-  // Vector Widening Floating-Point/Integer Type-Convert Instructions
-  INSN(vfwcvt_xu_f_v, 0b1010111, 0b001, 0b01000, 0b010010);
-  INSN(vfwcvt_x_f_v,  0b1010111, 0b001, 0b01001, 0b010010);
-  INSN(vfwcvt_f_xu_v, 0b1010111, 0b001, 0b01010, 0b010010);
-  INSN(vfwcvt_f_x_v,  0b1010111, 0b001, 0b01011, 0b010010);
-  INSN(vfwcvt_f_f_v,  0b1010111, 0b001, 0b01100, 0b010010);
-  INSN(vfwcvt_rtz_xu_f_v, 0b1010111, 0b001, 0b01110, 0b010010);
-  INSN(vfwcvt_rtz_x_f_v,  0b1010111, 0b001, 0b01111, 0b010010);
-
-  // Vector Narrowing Floating-Point/Integer Type-Convert Instructions
-  INSN(vfncvt_xu_f_w, 0b1010111, 0b001, 0b10000, 0b010010);
-  INSN(vfncvt_x_f_w,  0b1010111, 0b001, 0b10001, 0b010010);
-  INSN(vfncvt_f_xu_w, 0b1010111, 0b001, 0b10010, 0b010010);
-  INSN(vfncvt_f_x_w,  0b1010111, 0b001, 0b10011, 0b010010);
-  INSN(vfncvt_f_f_w,  0b1010111, 0b001, 0b10100, 0b010010);
-  INSN(vfncvt_rod_f_f_w,  0b1010111, 0b001, 0b10101, 0b010010);
-  INSN(vfncvt_rtz_xu_f_w, 0b1010111, 0b001, 0b10110, 0b010010);
-  INSN(vfncvt_rtz_x_f_w,  0b1010111, 0b001, 0b10111, 0b010010);
-
   // Vector Floating-Point Instruction
   INSN(vfsqrt_v,  0b1010111, 0b001, 0b00000, 0b010011);
   INSN(vfclass_v, 0b1010111, 0b001, 0b10000, 0b010011);
@@ -2251,21 +2301,1072 @@ enum Nf {
 #undef INSN
 #undef patch_VLdSt
 
+// ====================================
+// RISC-V Bit-Manipulation Extension
+// ====================================
+#define INSN(NAME, op, funct3, funct7)                  \
+  void NAME(Register Rd, Register Rs1, Register Rs2) {  \
+    unsigned insn = 0;                                  \
+    patch((address)&insn, 6,  0, op);                   \
+    patch((address)&insn, 14, 12, funct3);              \
+    patch((address)&insn, 31, 25, funct7);              \
+    patch_reg((address)&insn, 7, Rd);                   \
+    patch_reg((address)&insn, 15, Rs1);                 \
+    patch_reg((address)&insn, 20, Rs2);                 \
+    emit(insn);                                         \
+  }
+
+  INSN(add_uw,    0b0111011, 0b000, 0b0000100);
+  INSN(rol,       0b0110011, 0b001, 0b0110000);
+  INSN(rolw,      0b0111011, 0b001, 0b0110000);
+  INSN(ror,       0b0110011, 0b101, 0b0110000);
+  INSN(rorw,      0b0111011, 0b101, 0b0110000);
+  INSN(sh1add,    0b0110011, 0b010, 0b0010000);
+  INSN(sh2add,    0b0110011, 0b100, 0b0010000);
+  INSN(sh3add,    0b0110011, 0b110, 0b0010000);
+  INSN(sh1add_uw, 0b0111011, 0b010, 0b0010000);
+  INSN(sh2add_uw, 0b0111011, 0b100, 0b0010000);
+  INSN(sh3add_uw, 0b0111011, 0b110, 0b0010000);
+  INSN(andn,      0b0110011, 0b111, 0b0100000);
+  INSN(orn,       0b0110011, 0b110, 0b0100000);
+  INSN(xnor,      0b0110011, 0b100, 0b0100000);
+  INSN(max,       0b0110011, 0b110, 0b0000101);
+  INSN(maxu,      0b0110011, 0b111, 0b0000101);
+  INSN(min,       0b0110011, 0b100, 0b0000101);
+  INSN(minu,      0b0110011, 0b101, 0b0000101);
+
+#undef INSN
+
+#define INSN(NAME, op, funct3, funct12)                 \
+  void NAME(Register Rd, Register Rs1) {                \
+    unsigned insn = 0;                                  \
+    patch((address)&insn, 6, 0, op);                    \
+    patch((address)&insn, 14, 12, funct3);              \
+    patch((address)&insn, 31, 20, funct12);             \
+    patch_reg((address)&insn, 7, Rd);                   \
+    patch_reg((address)&insn, 15, Rs1);                 \
+    emit(insn);                                         \
+  }
+
+  INSN(rev8,   0b0010011, 0b101, 0b011010111000);
+  INSN(sext_b, 0b0010011, 0b001, 0b011000000100);
+  INSN(sext_h, 0b0010011, 0b001, 0b011000000101);
+  INSN(zext_h, 0b0111011, 0b100, 0b000010000000);
+  INSN(clz,    0b0010011, 0b001, 0b011000000000);
+  INSN(clzw,   0b0011011, 0b001, 0b011000000000);
+  INSN(ctz,    0b0010011, 0b001, 0b011000000001);
+  INSN(ctzw,   0b0011011, 0b001, 0b011000000001);
+  INSN(cpop,   0b0010011, 0b001, 0b011000000010);
+  INSN(cpopw,  0b0011011, 0b001, 0b011000000010);
+  INSN(orc_b,  0b0010011, 0b101, 0b001010000111);
+
+#undef INSN
+
+#define INSN(NAME, op, funct3, funct6)                  \
+  void NAME(Register Rd, Register Rs1, unsigned shamt) {\
+    guarantee(shamt <= 0x3f, "Shamt is invalid");       \
+    unsigned insn = 0;                                  \
+    patch((address)&insn, 6, 0, op);                    \
+    patch((address)&insn, 14, 12, funct3);              \
+    patch((address)&insn, 25, 20, shamt);               \
+    patch((address)&insn, 31, 26, funct6);              \
+    patch_reg((address)&insn, 7, Rd);                   \
+    patch_reg((address)&insn, 15, Rs1);                 \
+    emit(insn);                                         \
+  }
+
+  INSN(rori,    0b0010011, 0b101, 0b011000);
+  INSN(slli_uw, 0b0011011, 0b001, 0b000010);
+
+#undef INSN
+
+#define INSN(NAME, op, funct3, funct7)                  \
+  void NAME(Register Rd, Register Rs1, unsigned shamt) {\
+    guarantee(shamt <= 0x1f, "Shamt is invalid");       \
+    unsigned insn = 0;                                  \
+    patch((address)&insn, 6, 0, op);                    \
+    patch((address)&insn, 14, 12, funct3);              \
+    patch((address)&insn, 24, 20, shamt);               \
+    patch((address)&insn, 31, 25, funct7);              \
+    patch_reg((address)&insn, 7, Rd);                   \
+    patch_reg((address)&insn, 15, Rs1);                 \
+    emit(insn);                                         \
+  }
+
+  INSN(roriw, 0b0011011, 0b101, 0b0110000);
+
+#undef INSN
+
+// ========================================
+// RISC-V Compressed Instructions Extension
+// ========================================
+// Note:
+// 1. When UseRVC is enabled, 32-bit instructions under 'CompressibleRegion's will be
+//    transformed to 16-bit instructions if compressible.
+// 2. RVC instructions in Assembler always begin with 'c_' prefix, as 'c_li',
+//    but most of time we have no need to explicitly use these instructions.
+// 3. 'CompressibleRegion' is introduced to hint instructions in this Region's RTTI range
+//    are qualified to be compressed with their 2-byte versions.
+//    An example:
+//
+//      CompressibleRegion cr(_masm);
+//      __ andr(...);      // this instruction could change to c.and if able to
+//
+// 4. Using -XX:PrintAssemblyOptions=no-aliases could distinguish RVC instructions from
+//    normal ones.
+//
+
+private:
+  bool _in_compressible_region;
+public:
+  bool in_compressible_region() const { return _in_compressible_region; }
+  void set_in_compressible_region(bool b) { _in_compressible_region = b; }
+public:
+
+  // a compressible region
+  class CompressibleRegion : public StackObj {
+  protected:
+    Assembler *_masm;
+    bool _saved_in_compressible_region;
+  public:
+    CompressibleRegion(Assembler *_masm)
+    : _masm(_masm)
+    , _saved_in_compressible_region(_masm->in_compressible_region()) {
+      _masm->set_in_compressible_region(true);
+    }
+    ~CompressibleRegion() {
+      _masm->set_in_compressible_region(_saved_in_compressible_region);
+    }
+  };
+
+  // patch a 16-bit instruction.
+  static void c_patch(address a, unsigned msb, unsigned lsb, uint16_t val) {
+    assert_cond(a != NULL);
+    assert_cond(msb >= lsb && msb <= 15);
+    unsigned nbits = msb - lsb + 1;
+    guarantee(val < (1U << nbits), "Field too big for insn");
+    uint16_t mask = (1U << nbits) - 1;
+    val <<= lsb;
+    mask <<= lsb;
+    uint16_t target = *(uint16_t *)a;
+    target &= ~mask;
+    target |= val;
+    *(uint16_t *)a = target;
+  }
+
+  static void c_patch(address a, unsigned bit, uint16_t val) {
+    c_patch(a, bit, bit, val);
+  }
+
+  // patch a 16-bit instruction with a general purpose register ranging [0, 31] (5 bits)
+  static void c_patch_reg(address a, unsigned lsb, Register reg) {
+    c_patch(a, lsb + 4, lsb, reg->encoding_nocheck());
+  }
+
+  // patch a 16-bit instruction with a general purpose register ranging [8, 15] (3 bits)
+  static void c_patch_compressed_reg(address a, unsigned lsb, Register reg) {
+    c_patch(a, lsb + 2, lsb, reg->compressed_encoding_nocheck());
+  }
+
+  // patch a 16-bit instruction with a float register ranging [0, 31] (5 bits)
+  static void c_patch_reg(address a, unsigned lsb, FloatRegister reg) {
+    c_patch(a, lsb + 4, lsb, reg->encoding_nocheck());
+  }
+
+  // patch a 16-bit instruction with a float register ranging [8, 15] (3 bits)
+  static void c_patch_compressed_reg(address a, unsigned lsb, FloatRegister reg) {
+    c_patch(a, lsb + 2, lsb, reg->compressed_encoding_nocheck());
+  }
+
+// --------------  RVC Instruction Definitions  --------------
+
+  void c_nop() {
+    c_addi(x0, 0);
+  }
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME(Register Rd_Rs1, int32_t imm) {                                                  \
+    assert_cond(is_imm_in_range(imm, 6, 0));                                                 \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch((address)&insn, 6, 2, (imm & right_n_bits(5)));                                  \
+    c_patch_reg((address)&insn, 7, Rd_Rs1);                                                  \
+    c_patch((address)&insn, 12, 12, (imm & nth_bit(5)) >> 5);                                \
+    c_patch((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_addi,   0b000, 0b01);
+  INSN(c_addiw,  0b001, 0b01);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME(int32_t imm) {                                                                   \
+    assert_cond(is_imm_in_range(imm, 10, 0));                                                \
+    assert_cond((imm & 0b1111) == 0);                                                        \
+    assert_cond(imm != 0);                                                                   \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch((address)&insn, 2, 2, (imm & nth_bit(5)) >> 5);                                  \
+    c_patch((address)&insn, 4, 3, (imm & right_n_bits(9)) >> 7);                             \
+    c_patch((address)&insn, 5, 5, (imm & nth_bit(6)) >> 6);                                  \
+    c_patch((address)&insn, 6, 6, (imm & nth_bit(4)) >> 4);                                  \
+    c_patch_reg((address)&insn, 7, sp);                                                      \
+    c_patch((address)&insn, 12, 12, (imm & nth_bit(9)) >> 9);                                \
+    c_patch((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_addi16sp, 0b011, 0b01);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME(Register Rd, uint32_t uimm) {                                                    \
+    assert_cond(is_unsigned_imm_in_range(uimm, 10, 0));                                      \
+    assert_cond((uimm & 0b11) == 0);                                                         \
+    assert_cond(uimm != 0);                                                                  \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch_compressed_reg((address)&insn, 2, Rd);                                           \
+    c_patch((address)&insn, 5, 5, (uimm & nth_bit(3)) >> 3);                                 \
+    c_patch((address)&insn, 6, 6, (uimm & nth_bit(2)) >> 2);                                 \
+    c_patch((address)&insn, 10, 7, (uimm & right_n_bits(10)) >> 6);                          \
+    c_patch((address)&insn, 12, 11, (uimm & right_n_bits(6)) >> 4);                          \
+    c_patch((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_addi4spn, 0b000, 0b00);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME(Register Rd_Rs1, uint32_t shamt) {                                               \
+    assert_cond(is_unsigned_imm_in_range(shamt, 6, 0));                                      \
+    assert_cond(shamt != 0);                                                                 \
+    assert_cond(Rd_Rs1 != x0);                                                               \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch((address)&insn, 6, 2, (shamt & right_n_bits(5)));                                \
+    c_patch_reg((address)&insn, 7, Rd_Rs1);                                                  \
+    c_patch((address)&insn, 12, 12, (shamt & nth_bit(5)) >> 5);                              \
+    c_patch((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_slli, 0b000, 0b10);
+
+#undef INSN
+
+#define INSN(NAME, funct3, funct2, op)                                                       \
+  void NAME(Register Rd_Rs1, uint32_t shamt) {                                               \
+    assert_cond(is_unsigned_imm_in_range(shamt, 6, 0));                                      \
+    assert_cond(shamt != 0);                                                                 \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch((address)&insn, 6, 2, (shamt & right_n_bits(5)));                                \
+    c_patch_compressed_reg((address)&insn, 7, Rd_Rs1);                                       \
+    c_patch((address)&insn, 11, 10, funct2);                                                 \
+    c_patch((address)&insn, 12, 12, (shamt & nth_bit(5)) >> 5);                              \
+    c_patch((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_srli, 0b100, 0b00, 0b01);
+  INSN(c_srai, 0b100, 0b01, 0b01);
+
+#undef INSN
+
+#define INSN(NAME, funct3, funct2, op)                                                       \
+  void NAME(Register Rd_Rs1, int32_t imm) {                                                  \
+    assert_cond(is_imm_in_range(imm, 6, 0));                                                 \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch((address)&insn, 6, 2, (imm & right_n_bits(5)));                                  \
+    c_patch_compressed_reg((address)&insn, 7, Rd_Rs1);                                       \
+    c_patch((address)&insn, 11, 10, funct2);                                                 \
+    c_patch((address)&insn, 12, 12, (imm & nth_bit(5)) >> 5);                                \
+    c_patch((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_andi, 0b100, 0b10, 0b01);
+
+#undef INSN
+
+#define INSN(NAME, funct6, funct2, op)                                                       \
+  void NAME(Register Rd_Rs1, Register Rs2) {                                                 \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch_compressed_reg((address)&insn, 2, Rs2);                                          \
+    c_patch((address)&insn, 6, 5, funct2);                                                   \
+    c_patch_compressed_reg((address)&insn, 7, Rd_Rs1);                                       \
+    c_patch((address)&insn, 15, 10, funct6);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_sub,  0b100011, 0b00, 0b01);
+  INSN(c_xor,  0b100011, 0b01, 0b01);
+  INSN(c_or,   0b100011, 0b10, 0b01);
+  INSN(c_and,  0b100011, 0b11, 0b01);
+  INSN(c_subw, 0b100111, 0b00, 0b01);
+  INSN(c_addw, 0b100111, 0b01, 0b01);
+
+#undef INSN
+
+#define INSN(NAME, funct4, op)                                                               \
+  void NAME(Register Rd_Rs1, Register Rs2) {                                                 \
+    assert_cond(Rd_Rs1 != x0);                                                               \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch_reg((address)&insn, 2, Rs2);                                                     \
+    c_patch_reg((address)&insn, 7, Rd_Rs1);                                                  \
+    c_patch((address)&insn, 15, 12, funct4);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_mv,  0b1000, 0b10);
+  INSN(c_add, 0b1001, 0b10);
+
+#undef INSN
+
+#define INSN(NAME, funct4, op)                                                               \
+  void NAME(Register Rs1) {                                                                  \
+    assert_cond(Rs1 != x0);                                                                  \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch_reg((address)&insn, 2, x0);                                                      \
+    c_patch_reg((address)&insn, 7, Rs1);                                                     \
+    c_patch((address)&insn, 15, 12, funct4);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_jr,   0b1000, 0b10);
+  INSN(c_jalr, 0b1001, 0b10);
+
+#undef INSN
+
+  typedef void (Assembler::* j_c_insn)(address dest);
+  typedef void (Assembler::* compare_and_branch_c_insn)(Register Rs1, address dest);
+
+  void wrap_label(Label &L, j_c_insn insn) {
+    if (L.is_bound()) {
+      (this->*insn)(target(L));
+    } else {
+      L.add_patch_at(code(), locator());
+      (this->*insn)(pc());
+    }
+  }
+
+  void wrap_label(Label &L, Register r, compare_and_branch_c_insn insn) {
+    if (L.is_bound()) {
+      (this->*insn)(r, target(L));
+    } else {
+      L.add_patch_at(code(), locator());
+      (this->*insn)(r, pc());
+    }
+  }
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME(int32_t offset) {                                                                \
+    assert_cond(is_imm_in_range(offset, 11, 1));                                             \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch((address)&insn, 2, 2, (offset & nth_bit(5)) >> 5);                               \
+    c_patch((address)&insn, 5, 3, (offset & right_n_bits(4)) >> 1);                          \
+    c_patch((address)&insn, 6, 6, (offset & nth_bit(7)) >> 7);                               \
+    c_patch((address)&insn, 7, 7, (offset & nth_bit(6)) >> 6);                               \
+    c_patch((address)&insn, 8, 8, (offset & nth_bit(10)) >> 10);                             \
+    c_patch((address)&insn, 10, 9, (offset & right_n_bits(10)) >> 8);                        \
+    c_patch((address)&insn, 11, 11, (offset & nth_bit(4)) >> 4);                             \
+    c_patch((address)&insn, 12, 12, (offset & nth_bit(11)) >> 11);                           \
+    c_patch((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }                                                                                          \
+  void NAME(address dest) {                                                                  \
+    assert_cond(dest != NULL);                                                               \
+    int64_t distance = dest - pc();                                                          \
+    assert_cond(is_imm_in_range(distance, 11, 1));                                           \
+    c_j(distance);                                                                           \
+  }                                                                                          \
+  void NAME(Label &L) {                                                                      \
+    wrap_label(L, &Assembler::NAME);                                                         \
+  }
+
+  INSN(c_j, 0b101, 0b01);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME(Register Rs1, int32_t imm) {                                                     \
+    assert_cond(is_imm_in_range(imm, 8, 1));                                                 \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch((address)&insn, 2, 2, (imm & nth_bit(5)) >> 5);                                  \
+    c_patch((address)&insn, 4, 3, (imm & right_n_bits(3)) >> 1);                             \
+    c_patch((address)&insn, 6, 5, (imm & right_n_bits(8)) >> 6);                             \
+    c_patch_compressed_reg((address)&insn, 7, Rs1);                                          \
+    c_patch((address)&insn, 11, 10, (imm & right_n_bits(5)) >> 3);                           \
+    c_patch((address)&insn, 12, 12, (imm & nth_bit(8)) >> 8);                                \
+    c_patch((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }                                                                                          \
+  void NAME(Register Rs1, address dest) {                                                    \
+    assert_cond(dest != NULL);                                                               \
+    int64_t distance = dest - pc();                                                          \
+    assert_cond(is_imm_in_range(distance, 8, 1));                                            \
+    NAME(Rs1, distance);                                                                     \
+  }                                                                                          \
+  void NAME(Register Rs1, Label &L) {                                                        \
+    wrap_label(L, Rs1, &Assembler::NAME);                                                    \
+  }
+
+  INSN(c_beqz, 0b110, 0b01);
+  INSN(c_bnez, 0b111, 0b01);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME(Register Rd, int32_t imm) {                                                      \
+    assert_cond(is_imm_in_range(imm, 18, 0));                                                \
+    assert_cond((imm & 0xfff) == 0);                                                         \
+    assert_cond(imm != 0);                                                                   \
+    assert_cond(Rd != x0 && Rd != x2);                                                       \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch((address)&insn, 6, 2, (imm & right_n_bits(17)) >> 12);                           \
+    c_patch_reg((address)&insn, 7, Rd);                                                      \
+    c_patch((address)&insn, 12, 12, (imm & nth_bit(17)) >> 17);                              \
+    c_patch((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_lui, 0b011, 0b01);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME(Register Rd, int32_t imm) {                                                      \
+    assert_cond(is_imm_in_range(imm, 6, 0));                                                 \
+    assert_cond(Rd != x0);                                                                   \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch((address)&insn, 6, 2, (imm & right_n_bits(5)));                                  \
+    c_patch_reg((address)&insn, 7, Rd);                                                      \
+    c_patch((address)&insn, 12, 12, (imm & right_n_bits(6)) >> 5);                           \
+    c_patch((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_li, 0b010, 0b01);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME(Register Rd, uint32_t uimm) {                                                    \
+    assert_cond(is_unsigned_imm_in_range(uimm, 9, 0));                                       \
+    assert_cond((uimm & 0b111) == 0);                                                        \
+    assert_cond(Rd != x0);                                                                   \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch((address)&insn, 4, 2, (uimm & right_n_bits(9)) >> 6);                            \
+    c_patch((address)&insn, 6, 5, (uimm & right_n_bits(5)) >> 3);                            \
+    c_patch_reg((address)&insn, 7, Rd);                                                      \
+    c_patch((address)&insn, 12, 12, (uimm & nth_bit(5)) >> 5);                               \
+    c_patch((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_ldsp,  0b011, 0b10);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME(FloatRegister Rd, uint32_t uimm) {                                               \
+    assert_cond(is_unsigned_imm_in_range(uimm, 9, 0));                                       \
+    assert_cond((uimm & 0b111) == 0);                                                        \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch((address)&insn, 4, 2, (uimm & right_n_bits(9)) >> 6);                            \
+    c_patch((address)&insn, 6, 5, (uimm & right_n_bits(5)) >> 3);                            \
+    c_patch_reg((address)&insn, 7, Rd);                                                      \
+    c_patch((address)&insn, 12, 12, (uimm & nth_bit(5)) >> 5);                               \
+    c_patch((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_fldsp, 0b001, 0b10);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op, REGISTER_TYPE)                                                \
+  void NAME(REGISTER_TYPE Rd_Rs2, Register Rs1, uint32_t uimm) {                             \
+    assert_cond(is_unsigned_imm_in_range(uimm, 8, 0));                                       \
+    assert_cond((uimm & 0b111) == 0);                                                        \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch_compressed_reg((address)&insn, 2, Rd_Rs2);                                       \
+    c_patch((address)&insn, 6, 5, (uimm & right_n_bits(8)) >> 6);                            \
+    c_patch_compressed_reg((address)&insn, 7, Rs1);                                          \
+    c_patch((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3);                          \
+    c_patch((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_ld,  0b011, 0b00, Register);
+  INSN(c_sd,  0b111, 0b00, Register);
+  INSN(c_fld, 0b001, 0b00, FloatRegister);
+  INSN(c_fsd, 0b101, 0b00, FloatRegister);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op, REGISTER_TYPE)                                                \
+  void NAME(REGISTER_TYPE Rs2, uint32_t uimm) {                                              \
+    assert_cond(is_unsigned_imm_in_range(uimm, 9, 0));                                       \
+    assert_cond((uimm & 0b111) == 0);                                                        \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch_reg((address)&insn, 2, Rs2);                                                     \
+    c_patch((address)&insn, 9, 7, (uimm & right_n_bits(9)) >> 6);                            \
+    c_patch((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3);                          \
+    c_patch((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_sdsp,  0b111, 0b10, Register);
+  INSN(c_fsdsp, 0b101, 0b10, FloatRegister);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME(Register Rs2, uint32_t uimm) {                                                   \
+    assert_cond(is_unsigned_imm_in_range(uimm, 8, 0));                                       \
+    assert_cond((uimm & 0b11) == 0);                                                         \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch_reg((address)&insn, 2, Rs2);                                                     \
+    c_patch((address)&insn, 8, 7, (uimm & right_n_bits(8)) >> 6);                            \
+    c_patch((address)&insn, 12, 9, (uimm & right_n_bits(6)) >> 2);                           \
+    c_patch((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_swsp, 0b110, 0b10);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME(Register Rd, uint32_t uimm) {                                                    \
+    assert_cond(is_unsigned_imm_in_range(uimm, 8, 0));                                       \
+    assert_cond((uimm & 0b11) == 0);                                                         \
+    assert_cond(Rd != x0);                                                                   \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch((address)&insn, 3, 2, (uimm & right_n_bits(8)) >> 6);                            \
+    c_patch((address)&insn, 6, 4, (uimm & right_n_bits(5)) >> 2);                            \
+    c_patch_reg((address)&insn, 7, Rd);                                                      \
+    c_patch((address)&insn, 12, 12, (uimm & nth_bit(5)) >> 5);                               \
+    c_patch((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_lwsp, 0b010, 0b10);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME(Register Rd_Rs2, Register Rs1, uint32_t uimm) {                                  \
+    assert_cond(is_unsigned_imm_in_range(uimm, 7, 0));                                       \
+    assert_cond((uimm & 0b11) == 0);                                                         \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch_compressed_reg((address)&insn, 2, Rd_Rs2);                                       \
+    c_patch((address)&insn, 5, 5, (uimm & nth_bit(6)) >> 6);                                 \
+    c_patch((address)&insn, 6, 6, (uimm & nth_bit(2)) >> 2);                                 \
+    c_patch_compressed_reg((address)&insn, 7, Rs1);                                          \
+    c_patch((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3);                          \
+    c_patch((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_lw, 0b010, 0b00);
+  INSN(c_sw, 0b110, 0b00);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME() {                                                                              \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch((address)&insn, 11, 2, 0x0);                                                     \
+    c_patch((address)&insn, 12, 12, 0b1);                                                    \
+    c_patch((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_ebreak, 0b100, 0b10);
+
+#undef INSN
+
+// --------------  RVC Transformation Functions  --------------
+
+// --------------------------
+// Register instructions
+// --------------------------
+#define INSN(NAME)                                                                             \
+  void NAME(Register Rd, Register Rs1, Register Rs2) {                                         \
+    /* add -> c.add */                                                                         \
+    if (do_compress()) {                                                                       \
+      Register src = noreg;                                                                    \
+      if (Rs1 != x0 && Rs2 != x0 && ((src = Rs1, Rs2 == Rd) || (src = Rs2, Rs1 == Rd))) {      \
+        c_add(Rd, src);                                                                        \
+        return;                                                                                \
+      }                                                                                        \
+    }                                                                                          \
+    _add(Rd, Rs1, Rs2);                                                                        \
+  }
+
+  INSN(add);
+
+#undef INSN
+
+// --------------------------
+#define INSN(NAME, C_NAME, NORMAL_NAME)                                                      \
+  void NAME(Register Rd, Register Rs1, Register Rs2) {                                       \
+    /* sub/subw -> c.sub/c.subw */                                                           \
+    if (do_compress() &&                                                                     \
+        (Rd == Rs1 && Rd->is_compressed_valid() && Rs2->is_compressed_valid())) {            \
+      C_NAME(Rd, Rs2);                                                                       \
+      return;                                                                                \
+    }                                                                                        \
+    NORMAL_NAME(Rd, Rs1, Rs2);                                                               \
+  }
+
+  INSN(sub,  c_sub,  _sub);
+  INSN(subw, c_subw, _subw);
+
+#undef INSN
+
+// --------------------------
+#define INSN(NAME, C_NAME, NORMAL_NAME)                                                      \
+  void NAME(Register Rd, Register Rs1, Register Rs2) {                                       \
+    /* and/or/xor/addw -> c.and/c.or/c.xor/c.addw */                                         \
+    if (do_compress()) {                                                                     \
+      Register src = noreg;                                                                  \
+      if (Rs1->is_compressed_valid() && Rs2->is_compressed_valid() &&                        \
+        ((src = Rs1, Rs2 == Rd) || (src = Rs2, Rs1 == Rd))) {                                \
+        C_NAME(Rd, src);                                                                     \
+        return;                                                                              \
+      }                                                                                      \
+    }                                                                                        \
+    NORMAL_NAME(Rd, Rs1, Rs2);                                                               \
+  }
+
+  INSN(andr, c_and,  _andr);
+  INSN(orr,  c_or,   _orr);
+  INSN(xorr, c_xor,  _xorr);
+  INSN(addw, c_addw, _addw);
+
+#undef INSN
+
+private:
+// some helper functions
+  bool do_compress() const {
+    return UseRVC && in_compressible_region();
+  }
+
+#define FUNC(NAME, funct3, bits)                                                             \
+  bool NAME(Register rs1, Register rd_rs2, int32_t imm12, bool ld) {                         \
+    return rs1 == sp &&                                                                      \
+      is_unsigned_imm_in_range(imm12, bits, 0) &&                                            \
+      (intx(imm12) & funct3) == 0x0 &&                                                       \
+      (!ld || rd_rs2 != x0);                                                                 \
+  }                                                                                          \
+
+  FUNC(is_c_ldsdsp,  0b111, 9);
+  FUNC(is_c_lwswsp,  0b011, 8);
+
+#undef FUNC
+
+#define FUNC(NAME, funct3, bits)                                                             \
+  bool NAME(Register rs1, int32_t imm12) {                                                   \
+    return rs1 == sp &&                                                                      \
+      is_unsigned_imm_in_range(imm12, bits, 0) &&                                            \
+      (intx(imm12) & funct3) == 0x0;                                                         \
+  }                                                                                          \
+
+  FUNC(is_c_fldsdsp, 0b111, 9);
+
+#undef FUNC
+
+#define FUNC(NAME, REG_TYPE, funct3, bits)                                                   \
+  bool NAME(Register rs1, REG_TYPE rd_rs2, int32_t imm12) {                                  \
+    return rs1->is_compressed_valid() &&                                                     \
+      rd_rs2->is_compressed_valid() &&                                                       \
+      is_unsigned_imm_in_range(imm12, bits, 0) &&                                            \
+      (intx(imm12) & funct3) == 0x0;                                                         \
+  }                                                                                          \
+
+  FUNC(is_c_ldsd,  Register,      0b111, 8);
+  FUNC(is_c_lwsw,  Register,      0b011, 7);
+  FUNC(is_c_fldsd, FloatRegister, 0b111, 8);
+
+#undef FUNC
+
+public:
+// --------------------------
+// Load/store register
+// --------------------------
+#define INSN(NAME)                                                                           \
+  void NAME(Register Rd, Register Rs, const int32_t offset) {                                \
+    /* lw -> c.lwsp/c.lw */                                                                  \
+    if (do_compress()) {                                                                     \
+      if (is_c_lwswsp(Rs, Rd, offset, true)) {                                               \
+        c_lwsp(Rd, offset);                                                                  \
+        return;                                                                              \
+      } else if (is_c_lwsw(Rs, Rd, offset)) {                                                \
+        c_lw(Rd, Rs, offset);                                                                \
+        return;                                                                              \
+      }                                                                                      \
+    }                                                                                        \
+    _lw(Rd, Rs, offset);                                                                     \
+  }
+
+  INSN(lw);
+
+#undef INSN
+
+// --------------------------
+#define INSN(NAME)                                                                           \
+  void NAME(Register Rd, Register Rs, const int32_t offset) {                                \
+    /* ld -> c.ldsp/c.ld */                                                                  \
+    if (do_compress()) {                                                                     \
+      if (is_c_ldsdsp(Rs, Rd, offset, true)) {                                               \
+        c_ldsp(Rd, offset);                                                                  \
+        return;                                                                              \
+      } else if (is_c_ldsd(Rs, Rd, offset)) {                                                \
+        c_ld(Rd, Rs, offset);                                                                \
+        return;                                                                              \
+      }                                                                                      \
+    }                                                                                        \
+    _ld(Rd, Rs, offset);                                                                     \
+  }
+
+  INSN(ld);
+
+#undef INSN
+
+// --------------------------
+#define INSN(NAME)                                                                           \
+  void NAME(FloatRegister Rd, Register Rs, const int32_t offset) {                           \
+    /* fld -> c.fldsp/c.fld */                                                               \
+    if (do_compress()) {                                                                     \
+      if (is_c_fldsdsp(Rs, offset)) {                                                        \
+        c_fldsp(Rd, offset);                                                                 \
+        return;                                                                              \
+      } else if (is_c_fldsd(Rs, Rd, offset)) {                                               \
+        c_fld(Rd, Rs, offset);                                                               \
+        return;                                                                              \
+      }                                                                                      \
+    }                                                                                        \
+    _fld(Rd, Rs, offset);                                                                    \
+  }
+
+  INSN(fld);
+
+#undef INSN
+
+// --------------------------
+#define INSN(NAME)                                                                           \
+  void NAME(Register Rd, Register Rs, const int32_t offset) {                                \
+    /* sd -> c.sdsp/c.sd */                                                                  \
+    if (do_compress()) {                                                                     \
+      if (is_c_ldsdsp(Rs, Rd, offset, false)) {                                              \
+        c_sdsp(Rd, offset);                                                                  \
+        return;                                                                              \
+      } else if (is_c_ldsd(Rs, Rd, offset)) {                                                \
+        c_sd(Rd, Rs, offset);                                                                \
+        return;                                                                              \
+      }                                                                                      \
+    }                                                                                        \
+    _sd(Rd, Rs, offset);                                                                     \
+  }
+
+  INSN(sd);
+
+#undef INSN
+
+// --------------------------
+#define INSN(NAME)                                                                           \
+  void NAME(Register Rd, Register Rs, const int32_t offset) {                                \
+    /* sw -> c.swsp/c.sw */                                                                  \
+    if (do_compress()) {                                                                     \
+      if (is_c_lwswsp(Rs, Rd, offset, false)) {                                              \
+        c_swsp(Rd, offset);                                                                  \
+        return;                                                                              \
+      } else if (is_c_lwsw(Rs, Rd, offset)) {                                                \
+        c_sw(Rd, Rs, offset);                                                                \
+        return;                                                                              \
+      }                                                                                      \
+    }                                                                                        \
+    _sw(Rd, Rs, offset);                                                                     \
+  }
+
+  INSN(sw);
+
+#undef INSN
+
+// --------------------------
+#define INSN(NAME)                                                                           \
+  void NAME(FloatRegister Rd, Register Rs, const int32_t offset) {                           \
+    /* fsd -> c.fsdsp/c.fsd */                                                               \
+    if (do_compress()) {                                                                     \
+      if (is_c_fldsdsp(Rs, offset)) {                                                        \
+        c_fsdsp(Rd, offset);                                                                 \
+        return;                                                                              \
+      } else if (is_c_fldsd(Rs, Rd, offset)) {                                               \
+        c_fsd(Rd, Rs, offset);                                                               \
+        return;                                                                              \
+      }                                                                                      \
+    }                                                                                        \
+    _fsd(Rd, Rs, offset);                                                                    \
+  }
+
+  INSN(fsd);
+
+#undef INSN
+
+// --------------------------
+// Conditional branch instructions
+// --------------------------
+#define INSN(NAME, C_NAME, NORMAL_NAME)                                                      \
+  void NAME(Register Rs1, Register Rs2, const int64_t offset) {                              \
+    /* beq/bne -> c.beqz/c.bnez */                                                           \
+    if (do_compress() &&                                                                     \
+        (offset != 0 && Rs2 == x0 && Rs1->is_compressed_valid() &&                           \
+        is_imm_in_range(offset, 8, 1))) {                                                    \
+      C_NAME(Rs1, offset);                                                                   \
+      return;                                                                                \
+    }                                                                                        \
+    NORMAL_NAME(Rs1, Rs2, offset);                                                           \
+  }
+
+  INSN(beq, c_beqz, _beq);
+  INSN(bne, c_beqz, _bne);
+
+#undef INSN
+
+// --------------------------
+// Unconditional branch instructions
+// --------------------------
+#define INSN(NAME)                                                                           \
+  void NAME(Register Rd, const int32_t offset) {                                             \
+    /* jal -> c.j */                                                                         \
+    if (do_compress() && offset != 0 && Rd == x0 && is_imm_in_range(offset, 11, 1)) {        \
+      c_j(offset);                                                                           \
+      return;                                                                                \
+    }                                                                                        \
+    _jal(Rd, offset);                                                                        \
+  }
+
+  INSN(jal);
+
+#undef INSN
+
+// --------------------------
+#define INSN(NAME)                                                                           \
+  void NAME(Register Rd, Register Rs, const int32_t offset) {                                \
+    /* jalr -> c.jr/c.jalr */                                                                \
+    if (do_compress() && (offset == 0 && Rs != x0)) {                                        \
+      if (Rd == x1) {                                                                        \
+        c_jalr(Rs);                                                                          \
+        return;                                                                              \
+      } else if (Rd == x0) {                                                                 \
+        c_jr(Rs);                                                                            \
+        return;                                                                              \
+      }                                                                                      \
+    }                                                                                        \
+    _jalr(Rd, Rs, offset);                                                                   \
+  }
+
+  INSN(jalr);
+
+#undef INSN
+
+// --------------------------
+// Miscellaneous Instructions
+// --------------------------
+#define INSN(NAME)                                                     \
+  void NAME() {                                                        \
+    /* ebreak -> c.ebreak */                                           \
+    if (do_compress()) {                                               \
+      c_ebreak();                                                      \
+      return;                                                          \
+    }                                                                  \
+    _ebreak();                                                         \
+  }
+
+  INSN(ebreak);
+
+#undef INSN
+
+#define INSN(NAME)                                                      \
+  void NAME() {                                                         \
+    /* The illegal instruction in RVC is presented by a 16-bit 0. */    \
+    if (do_compress()) {                                                \
+      emit_int16(0);                                                    \
+      return;                                                           \
+    }                                                                   \
+    _halt();                                                            \
+  }
+
+  INSN(halt);
+
+#undef INSN
+
+// --------------------------
+// Immediate Instructions
+// --------------------------
+#define INSN(NAME)                                                                           \
+  void NAME(Register Rd, int64_t imm) {                                                      \
+    /* li -> c.li */                                                                         \
+    if (do_compress() && (is_imm_in_range(imm, 6, 0) && Rd != x0)) {                         \
+      c_li(Rd, imm);                                                                         \
+      return;                                                                                \
+    }                                                                                        \
+    _li(Rd, imm);                                                                            \
+  }
+
+  INSN(li);
+
+#undef INSN
+
+// --------------------------
+#define INSN(NAME)                                                                           \
+  void NAME(Register Rd, Register Rs1, int32_t imm) {                                        \
+    /* addi -> c.addi/c.nop/c.mv/c.addi16sp/c.addi4spn */                                    \
+    if (do_compress()) {                                                                     \
+      if (Rd == Rs1 && is_imm_in_range(imm, 6, 0)) {                                         \
+        c_addi(Rd, imm);                                                                     \
+        return;                                                                              \
+      } else if (imm == 0 && Rd != x0 && Rs1 != x0) {                                        \
+        c_mv(Rd, Rs1);                                                                       \
+        return;                                                                              \
+      } else if (Rs1 == sp && imm != 0) {                                                    \
+        if (Rd == Rs1 && (imm & 0b1111) == 0x0 && is_imm_in_range(imm, 10, 0)) {             \
+          c_addi16sp(imm);                                                                   \
+          return;                                                                            \
+        } else if (Rd->is_compressed_valid() && (imm & 0b11) == 0x0 && is_unsigned_imm_in_range(imm, 10, 0)) { \
+          c_addi4spn(Rd, imm);                                                               \
+          return;                                                                            \
+        }                                                                                    \
+      }                                                                                      \
+    }                                                                                        \
+    _addi(Rd, Rs1, imm);                                                                     \
+  }
+
+  INSN(addi);
+
+#undef INSN
+
+// --------------------------
+#define INSN(NAME)                                                                           \
+  void NAME(Register Rd, Register Rs1, int32_t imm) {                                        \
+    /* addiw -> c.addiw */                                                                   \
+    if (do_compress() && (Rd == Rs1 && Rd != x0 && is_imm_in_range(imm, 6, 0))) {            \
+      c_addiw(Rd, imm);                                                                      \
+      return;                                                                                \
+    }                                                                                        \
+    _addiw(Rd, Rs1, imm);                                                                    \
+  }
+
+  INSN(addiw);
+
+#undef INSN
+
+// --------------------------
+#define INSN(NAME)                                                                           \
+  void NAME(Register Rd, Register Rs1, int32_t imm) {                                        \
+    /* and_imm12 -> c.andi */                                                                \
+    if (do_compress() &&                                                                     \
+        (Rd == Rs1 && Rd->is_compressed_valid() && is_imm_in_range(imm, 6, 0))) {            \
+      c_andi(Rd, imm);                                                                       \
+      return;                                                                                \
+    }                                                                                        \
+    _and_imm12(Rd, Rs1, imm);                                                                \
+  }
+
+  INSN(and_imm12);
+
+#undef INSN
+
+// --------------------------
+// Shift Immediate Instructions
+// --------------------------
+#define INSN(NAME)                                                                           \
+  void NAME(Register Rd, Register Rs1, unsigned shamt) {                                     \
+    /* slli -> c.slli */                                                                     \
+    if (do_compress() && (Rd == Rs1 && Rd != x0 && shamt != 0)) {                            \
+      c_slli(Rd, shamt);                                                                     \
+      return;                                                                                \
+    }                                                                                        \
+    _slli(Rd, Rs1, shamt);                                                                   \
+  }
+
+  INSN(slli);
+
+#undef INSN
+
+// --------------------------
+#define INSN(NAME, C_NAME, NORMAL_NAME)                                                      \
+  void NAME(Register Rd, Register Rs1, unsigned shamt) {                                     \
+    /* srai/srli -> c.srai/c.srli */                                                         \
+    if (do_compress() && (Rd == Rs1 && Rd->is_compressed_valid() && shamt != 0)) {           \
+      C_NAME(Rd, shamt);                                                                     \
+      return;                                                                                \
+    }                                                                                        \
+    NORMAL_NAME(Rd, Rs1, shamt);                                                             \
+  }
+
+  INSN(srai, c_srai, _srai);
+  INSN(srli, c_srli, _srli);
+
+#undef INSN
+
+// --------------------------
+// Upper Immediate Instruction
+// --------------------------
+#define INSN(NAME)                                                                           \
+  void NAME(Register Rd, int32_t imm) {                                                      \
+    /* lui -> c.lui */                                                                       \
+    if (do_compress() && (Rd != x0 && Rd != x2 && imm != 0 && is_imm_in_range(imm, 18, 0))) { \
+      c_lui(Rd, imm);                                                                        \
+      return;                                                                                \
+    }                                                                                        \
+    _lui(Rd, imm);                                                                           \
+  }
+
+  INSN(lui);
+
+#undef INSN
+
 // CSky specific Instruction
 // load into 2 registers, store 2 registers
 #define INSN(NAME, op, funct3, funct5)                                                       \
   void NAME(Register Rd1, Register Rd2, Register Rs, const int32_t offset) {                 \
-    guarantee(offset >= 0 && offset <= 3, "offset is invalid.");                          \
-    unsigned insn = 0;                                                                             \
+    guarantee(offset >= 0 && offset <= 3, "offset is invalid.");                             \
+    unsigned insn = 0;                                                                       \
     int32_t val = offset & 0x3;                                                              \
-    patch((address)&insn, 6,  0,  op);                                                             \
-    patch_reg((address)&insn, 7,  Rd1);                                                            \
-    patch((address)&insn, 14, 12, funct3);                                                         \
-    patch_reg((address)&insn, 15, Rs);                                                             \
-    patch_reg((address)&insn, 20, Rd2);                                                            \
-    patch((address)&insn, 26, 25, val);                                                            \
-    patch((address)&insn, 31, 27, funct5);                                                         \
-    emit(insn);                                                                                    \
+    patch((address)&insn, 6,  0,  op);                                                       \
+    patch_reg((address)&insn, 7,  Rd1);                                                      \
+    patch((address)&insn, 14, 12, funct3);                                                   \
+    patch_reg((address)&insn, 15, Rs);                                                       \
+    patch_reg((address)&insn, 20, Rd2);                                                      \
+    patch((address)&insn, 26, 25, val);                                                      \
+    patch((address)&insn, 31, 27, funct5);                                                   \
+    emit(insn);                                                                              \
   }
   INSN(ldd,  0b0001011, 0b100, 0b11111);
   INSN(lwd,  0b0001011, 0b100, 0b11100);
@@ -2274,6 +3375,8 @@ enum Nf {
   INSN(swd,  0b0001011, 0b101, 0b11100);
 #undef INSN
 
+// ---------------------------------------------------------------------------------------
+
   void bgt(Register Rs, Register Rt, const address &dest);
   void ble(Register Rs, Register Rt, const address &dest);
   void bgtu(Register Rs, Register Rt, const address &dest);
@@ -2299,7 +3402,11 @@ enum Nf {
   void sub(Register Rd, Register Rn, int64_t decrement, Register temp = t0);
   void subw(Register Rd, Register Rn, int64_t decrement, Register temp = t0);
 
-  Assembler(CodeBuffer* code) : AbstractAssembler(code) {
+  // RVB pseudo instructions
+  // zero extend word
+  void zext_w(Register Rd, Register Rs);
+
+  Assembler(CodeBuffer* code) : AbstractAssembler(code), _in_compressible_region(false) {
   }
 
   virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
@@ -2316,23 +3423,14 @@ enum Nf {
     return is_imm_in_range(imm, 12, 0);
   }
 
-  // The maximum range of a branch is fixed for the riscv64
-  // architecture.
+  // The maximum range of a branch is fixed for the RISCV architecture.
   static const unsigned long branch_range = 1 * M;
 
   static bool reachable_from_branch_at(address branch, address target) {
     return uabs(target - branch) < branch_range;
   }
 
-  static Assembler::SEW elemBytes_to_sew(int esize) {
-    assert(esize > 0 && esize <= 64 && is_power_of_2(esize), "unsupported element size");
-    return (Assembler::SEW) exact_log2(esize);
-  }
-
   virtual ~Assembler() {}
-
-#undef NORMAL
-#undef COMPRESSED
 };
 
 class BiasedLockingCounters;
diff --git a/src/hotspot/cpu/riscv/bytes_riscv.hpp b/src/hotspot/cpu/riscv/bytes_riscv.hpp
index 0ac92413aae..f60e0e38ae8 100644
--- a/src/hotspot/cpu/riscv/bytes_riscv.hpp
+++ b/src/hotspot/cpu/riscv/bytes_riscv.hpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012, 2016 SAP SE. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,8 +27,6 @@
 #ifndef CPU_RISCV_BYTES_RISCV_HPP
 #define CPU_RISCV_BYTES_RISCV_HPP
 
-#include "memory/allocation.hpp"
-
 class Bytes: AllStatic {
  public:
   // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering
diff --git a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
index 96aa18cf6f7..3779514d2fc 100644
--- a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -39,8 +39,7 @@
 
 #define __ ce->masm()->
 
-void CounterOverflowStub::emit_code(LIR_Assembler* ce)
-{
+void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
   __ bind(_entry);
   Metadata *m = _method->as_constant_ptr()->as_metadata();
   __ mov_metadata(t0, m);
@@ -53,21 +52,18 @@ void CounterOverflowStub::emit_code(LIR_Assembler* ce)
 }
 
 RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, LIR_Opr array)
-  : _index(index), _array(array), _throw_index_out_of_bounds_exception(false)
-{
+  : _index(index), _array(array), _throw_index_out_of_bounds_exception(false) {
   assert(info != NULL, "must have info");
   _info = new CodeEmitInfo(info);
 }
 
 RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index)
-  : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true)
-{
+  : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true) {
   assert(info != NULL, "must have info");
   _info = new CodeEmitInfo(info);
 }
 
-void RangeCheckStub::emit_code(LIR_Assembler* ce)
-{
+void RangeCheckStub::emit_code(LIR_Assembler* ce) {
   __ bind(_entry);
   if (_info->deoptimize_on_exception()) {
     address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
@@ -92,20 +88,18 @@ void RangeCheckStub::emit_code(LIR_Assembler* ce)
     stub_id = Runtime1::throw_range_check_failed_id;
   }
   int32_t off = 0;
-  __ la_patchable(lr, RuntimeAddress(Runtime1::entry_for(stub_id)), off);
-  __ jalr(lr, lr, off);
+  __ la_patchable(ra, RuntimeAddress(Runtime1::entry_for(stub_id)), off);
+  __ jalr(ra, ra, off);
   ce->add_call_info_here(_info);
   ce->verify_oop_map(_info);
   debug_only(__ should_not_reach_here());
 }
 
-PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info)
-{
+PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) {
   _info = new CodeEmitInfo(info);
 }
 
-void PredicateFailedStub::emit_code(LIR_Assembler* ce)
-{
+void PredicateFailedStub::emit_code(LIR_Assembler* ce) {
   __ bind(_entry);
   address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
   __ far_call(RuntimeAddress(a));
@@ -114,8 +108,7 @@ void PredicateFailedStub::emit_code(LIR_Assembler* ce)
   debug_only(__ should_not_reach_here());
 }
 
-void DivByZeroStub::emit_code(LIR_Assembler* ce)
-{
+void DivByZeroStub::emit_code(LIR_Assembler* ce) {
   if (_offset != -1) {
     ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
   }
@@ -129,21 +122,19 @@ void DivByZeroStub::emit_code(LIR_Assembler* ce)
 }
 
 // Implementation of NewInstanceStub
-NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id)
-{
+NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) {
   _result = result;
   _klass = klass;
   _klass_reg = klass_reg;
   _info = new CodeEmitInfo(info);
-  assert(stub_id == Runtime1::new_instance_id ||
-         stub_id == Runtime1::fast_new_instance_id ||
+  assert(stub_id == Runtime1::new_instance_id                 ||
+         stub_id == Runtime1::fast_new_instance_id            ||
          stub_id == Runtime1::fast_new_instance_init_check_id,
          "need new_instance id");
   _stub_id = stub_id;
 }
 
-void NewInstanceStub::emit_code(LIR_Assembler* ce)
-{
+void NewInstanceStub::emit_code(LIR_Assembler* ce) {
   assert(__ rsp_offset() == 0, "frame size should be fixed");
   __ bind(_entry);
   __ mv(x13, _klass_reg->as_register());
@@ -155,16 +146,14 @@ void NewInstanceStub::emit_code(LIR_Assembler* ce)
 }
 
 // Implementation of NewTypeArrayStub
-NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info)
-{
+NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) {
   _klass_reg = klass_reg;
   _length = length;
   _result = result;
   _info = new CodeEmitInfo(info);
 }
 
-void NewTypeArrayStub::emit_code(LIR_Assembler* ce)
-{
+void NewTypeArrayStub::emit_code(LIR_Assembler* ce) {
   assert(__ rsp_offset() == 0, "frame size should be fixed");
   __ bind(_entry);
   assert(_length->as_register() == x9, "length must in x9");
@@ -177,16 +166,14 @@ void NewTypeArrayStub::emit_code(LIR_Assembler* ce)
 }
 
 // Implementation of NewObjectArrayStub
-NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info)
-{
+NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) {
   _klass_reg = klass_reg;
   _result = result;
   _length = length;
   _info = new CodeEmitInfo(info);
 }
 
-void NewObjectArrayStub::emit_code(LIR_Assembler* ce)
-{
+void NewObjectArrayStub::emit_code(LIR_Assembler* ce) {
   assert(__ rsp_offset() == 0, "frame size should be fixed");
   __ bind(_entry);
   assert(_length->as_register() == x9, "length must in x9");
@@ -200,13 +187,11 @@ void NewObjectArrayStub::emit_code(LIR_Assembler* ce)
 
 // Implementation of MonitorAccessStubs
 MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info)
-: MonitorAccessStub(obj_reg, lock_reg)
-{
+: MonitorAccessStub(obj_reg, lock_reg) {
   _info = new CodeEmitInfo(info);
 }
 
-void MonitorEnterStub::emit_code(LIR_Assembler* ce)
-{
+void MonitorEnterStub::emit_code(LIR_Assembler* ce) {
   assert(__ rsp_offset() == 0, "frame size should be fixed");
   __ bind(_entry);
   ce->store_parameter(_obj_reg->as_register(),  1);
@@ -223,8 +208,7 @@ void MonitorEnterStub::emit_code(LIR_Assembler* ce)
   __ j(_continuation);
 }
 
-void MonitorExitStub::emit_code(LIR_Assembler* ce)
-{
+void MonitorExitStub::emit_code(LIR_Assembler* ce) {
   __ bind(_entry);
   if (_compute_lock) {
     // lock_reg was destroyed by fast unlocking attempt => recompute it
@@ -238,7 +222,7 @@ void MonitorExitStub::emit_code(LIR_Assembler* ce)
   } else {
     exit_id = Runtime1::monitorexit_nofpu_id;
   }
-  __ la(lr, _continuation);
+  __ la(ra, _continuation);
   __ far_jump(RuntimeAddress(Runtime1::entry_for(exit_id)));
 }
 
@@ -247,18 +231,23 @@ void LoadKlassStub::emit_code(LIR_Assembler* ce) {
   Unimplemented();
 }
 
+// Implementation of patching:
+// - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes)
+// - Replace original code with a call to the stub
+// At Runtime:
+// - call to stub, jump to runtime
+// - in runtime: preserve all registers (rspecially objects, i.e., source and destination object)
+// - in runtime: after initializing class, restore original code, reexecute instruction
+
 int PatchingStub::_patch_info_offset = -NativeGeneralJump::instruction_size;
 
 void PatchingStub::align_patch_site(MacroAssembler* masm) {}
 
-// RISCV64 don't use C1 runtime patching. When need patch, just deoptimize.
-void PatchingStub::emit_code(LIR_Assembler* ce)
-{
-  assert(false, "RISCV64 should not use C1 runtime patching");
+void PatchingStub::emit_code(LIR_Assembler* ce) {
+  assert(false, "RISCV should not use C1 runtime patching");
 }
 
-void DeoptimizeStub::emit_code(LIR_Assembler* ce)
-{
+void DeoptimizeStub::emit_code(LIR_Assembler* ce) {
   __ bind(_entry);
   ce->store_parameter(_trap_request, 0);
   __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::deoptimize_id)));
@@ -266,8 +255,7 @@ void DeoptimizeStub::emit_code(LIR_Assembler* ce)
   DEBUG_ONLY(__ should_not_reach_here());
 }
 
-void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce)
-{
+void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) {
   address a = NULL;
   if (_info->deoptimize_on_exception()) {
     // Deoptimize, do not throw the exception, because it is probably wrong to do it here.
@@ -284,8 +272,7 @@ void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce)
   debug_only(__ should_not_reach_here());
 }
 
-void SimpleExceptionStub::emit_code(LIR_Assembler* ce)
-{
+void SimpleExceptionStub::emit_code(LIR_Assembler* ce) {
   assert(__ rsp_offset() == 0, "frame size should be fixed");
 
   __ bind(_entry);
@@ -299,14 +286,12 @@ void SimpleExceptionStub::emit_code(LIR_Assembler* ce)
   debug_only(__ should_not_reach_here());
 }
 
-void ArrayCopyStub::emit_code(LIR_Assembler* ce)
-{
+void ArrayCopyStub::emit_code(LIR_Assembler* ce) {
   // ---------------slow case: call to native-----------------
   __ bind(_entry);
   // Figure out where the args should go
   // This should really convert the IntrinsicID to the Method* and signature
   // but I don't know how to do that.
-  //
   const int args_num = 5;
   VMRegPair args[args_num];
   BasicType signature[args_num] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT };
@@ -314,12 +299,11 @@ void ArrayCopyStub::emit_code(LIR_Assembler* ce)
 
   // push parameters
   Register r[args_num];
-  int i = 0;
-  r[i++] = src()->as_register();
-  r[i++] = src_pos()->as_register();
-  r[i++] = dst()->as_register();
-  r[i++] = dst_pos()->as_register();
-  r[i++] = length()->as_register();
+  r[0] = src()->as_register();
+  r[1] = src_pos()->as_register();
+  r[2] = dst()->as_register();
+  r[3] = dst_pos()->as_register();
+  r[4] = length()->as_register();
 
   // next registers will get stored on the stack
   for (int j = 0; j < args_num; j++) {
@@ -328,7 +312,7 @@ void ArrayCopyStub::emit_code(LIR_Assembler* ce)
       int st_off = r_1->reg2stack() * wordSize;
       __ sd(r[j], Address(sp, st_off));
     } else {
-      assert(r[j] == args[j].first()->as_Register(), "Wrong register for arg ");
+      assert(r[j] == args[j].first()->as_Register(), "Wrong register for arg");
     }
   }
 
@@ -348,8 +332,10 @@ void ArrayCopyStub::emit_code(LIR_Assembler* ce)
   ce->add_call_info_here(info());
 
 #ifndef PRODUCT
-  __ la(t1, ExternalAddress((address)&Runtime1::_arraycopy_slowcase_cnt));
-  __ add_memory_int32(Address(t1), 1);
+  if (PrintC1Statistics) {
+    __ la(t1, ExternalAddress((address)&Runtime1::_arraycopy_slowcase_cnt));
+    __ add_memory_int32(Address(t1), 1);
+  }
 #endif
 
   __ j(_continuation);
diff --git a/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp b/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp
index 550d6be22e1..4417ad63091 100644
--- a/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp
+++ b/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp
@@ -1,7 +1,6 @@
 /*
  * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -77,7 +76,7 @@ enum {
 
 // Encoding of float value in debug info.  This is true on x86 where
 // floats are extended to doubles when stored in the stack, false for
-// RISCV64 where floats and doubles are stored in their native form.
+// RISCV where floats and doubles are stored in their native form.
 enum {
   pd_float_saved_as_double = false
 };
diff --git a/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp
index 657cf025d18..e3a2606c532 100644
--- a/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp
@@ -1,7 +1,6 @@
 /*
  * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -28,4 +27,4 @@
 //               FpuStackSim
 //--------------------------------------------------------
 
-// No FPU stack on RISCV64
+// No FPU stack on RISCV
diff --git a/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp
index 5686156edcc..7bc3d311501 100644
--- a/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp
+++ b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp
@@ -1,7 +1,6 @@
 /*
  * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,7 +26,7 @@
 #ifndef CPU_RISCV_C1_FPUSTACKSIM_RISCV_HPP
 #define CPU_RISCV_C1_FPUSTACKSIM_RISCV_HPP
 
-// No FPU stack on RISCV64
+// No FPU stack on RISCV
 class FpuStackSim;
 
 #endif // CPU_RISCV_C1_FPUSTACKSIM_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp
index e7de3f39f2d..682ebe82627 100644
--- a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp
@@ -1,7 +1,6 @@
 /*
  * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -30,8 +29,7 @@
 #include "runtime/sharedRuntime.hpp"
 #include "vmreg_riscv.inline.hpp"
 
-LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool)
-{
+LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) {
   LIR_Opr opr = LIR_OprFact::illegalOpr;
   VMReg r_1 = reg->first();
   VMReg r_2 = reg->second();
@@ -231,7 +229,7 @@ void FrameMap::initialize() {
 
   // special register
   map_register(i, x0);  zr_opr  = LIR_OprFact::single_cpu(i); i++;  // zr
-  map_register(i, x1);  r1_opr  = LIR_OprFact::single_cpu(i); i++;  // lr
+  map_register(i, x1);  r1_opr  = LIR_OprFact::single_cpu(i); i++;  // ra
   map_register(i, x2);  r2_opr  = LIR_OprFact::single_cpu(i); i++;  // sp
   map_register(i, x3);  r3_opr  = LIR_OprFact::single_cpu(i); i++;  // gp
   map_register(i, x4);  r4_opr  = LIR_OprFact::single_cpu(i); i++;  // thread
@@ -331,7 +329,7 @@ Address FrameMap::make_new_address(ByteSize sp_offset) const {
 
 
 // ----------------mapping-----------------------
-// all mapping is based on rfp addressing, except for simple leaf methods where we access
+// all mapping is based on fp addressing, except for simple leaf methods where we access
 // the locals sp based (and no frame is built)
 
 
@@ -352,7 +350,7 @@ Address FrameMap::make_new_address(ByteSize sp_offset) const {
 //   +----------+
 //   | ret addr |
 //   +----------+
-//   |  args    |  <- RFP
+//   |  args    |  <- FP
 //   | .........|
 
 
@@ -376,14 +374,13 @@ VMReg FrameMap::fpu_regname (int n) {
   return as_FloatRegister(n)->as_VMReg();
 }
 
-LIR_Opr FrameMap::stack_pointer()
-{
+LIR_Opr FrameMap::stack_pointer() {
   return FrameMap::sp_opr;
 }
 
 // JSR 292
 LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() {
-  return LIR_OprFact::illegalOpr;  // Not needed on riscv64
+  return LIR_OprFact::illegalOpr;  // Not needed on riscv
 }
 
 bool FrameMap::validate_frame() {
diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp
index b1ff1afb660..01281f5c9e1 100644
--- a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp
+++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp
@@ -1,7 +1,6 @@
 /*
  * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,7 +26,7 @@
 #ifndef CPU_RISCV_C1_FRAMEMAP_RISCV_HPP
 #define CPU_RISCV_C1_FRAMEMAP_RISCV_HPP
 
-//  On RISCV64 the frame looks as follows:
+//  On RISCV the frame looks as follows:
 //
 //  +-----------------------------+---------+----------------------------------------+----------------+-----------
 //  | size_arguments-nof_reg_args | 2 words | size_locals-size_arguments+numreg_args | _size_monitors | spilling .
diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
index 5c1ca282e04..2a99d49c94b 100644
--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
@@ -1,7 +1,6 @@
 /*
  * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -65,8 +64,7 @@ void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right,
         if (is_imm_in_range(c - 1, 12, 0)) {
           __ andi(t1, t1, c - 1);
         } else {
-          __ slli(t1, t1, registerSize - shift);
-          __ srli(t1, t1, registerSize - shift);
+          __ zero_extend(t1, t1, shift);
         }
         __ subw(dreg, t1, t0);
       }
@@ -80,8 +78,7 @@ void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right,
         if (is_imm_in_range(c - 1, 12, 0)) {
           __ andi(t0, t0, c - 1);
         } else {
-          __ slli(t0, t0, registerSize - shift);
-          __ srli(t0, t0, registerSize - shift);
+          __ zero_extend(t0, t0, shift);
         }
         __ addw(dreg, t0, lreg);
         __ sraiw(dreg, dreg, shift);
@@ -193,39 +190,37 @@ void LIR_Assembler::arith_op_double_cpu(LIR_Code code, LIR_Opr left, LIR_Opr rig
         code == lir_add ? __ add(dreg, lreg_lo, c) : __ sub(dreg, lreg_lo, c);
         break;
       case lir_div:
-        assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant");
+        assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant");
         if (c == 1) {
           // move lreg_lo to dreg if divisor is 1
           __ mv(dreg, lreg_lo);
         } else {
-          unsigned int shift = exact_log2(c);
+          unsigned int shift = exact_log2_long(c);
           // use t0 as intermediate result register
           __ srai(t0, lreg_lo, 0x3f);
           if (is_imm_in_range(c - 1, 12, 0)) {
             __ andi(t0, t0, c - 1);
           } else {
-            __ slli(t0, t0, registerSize - shift);
-            __ srli(t0, t0, registerSize - shift);
+            __ zero_extend(t0, t0, shift);
           }
           __ add(dreg, t0, lreg_lo);
           __ srai(dreg, dreg, shift);
         }
         break;
       case lir_rem:
-        assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant");
+        assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant");
         if (c == 1) {
           // move 0 to dreg if divisor is 1
           __ mv(dreg, zr);
         } else {
-          unsigned int shift = exact_log2(c);
+          unsigned int shift = exact_log2_long(c);
           __ srai(t0, lreg_lo, 0x3f);
           __ srli(t0, t0, BitsPerLong - shift);
           __ add(t1, lreg_lo, t0);
           if (is_imm_in_range(c - 1, 12, 0)) {
             __ andi(t1, t1, c - 1);
           } else {
-            __ slli(t1, t1, registerSize - shift);
-            __ srli(t1, t1, registerSize - shift);
+            __ zero_extend(t1, t1, shift);
           }
           __ sub(dreg, t1, t0);
         }
@@ -243,9 +238,9 @@ void LIR_Assembler::arith_op_single_fpu(LIR_Code code, LIR_Opr left, LIR_Opr rig
   switch (code) {
     case lir_add: __ fadd_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
     case lir_sub: __ fsub_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
-    case lir_mul_strictfp:  // fall through
+    case lir_mul_strictfp: // fall through
     case lir_mul: __ fmul_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
-    case lir_div_strictfp:  // fall through
+    case lir_div_strictfp: // fall through
     case lir_div: __ fdiv_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
     default:
       ShouldNotReachHere();
@@ -258,9 +253,9 @@ void LIR_Assembler::arith_op_double_fpu(LIR_Code code, LIR_Opr left, LIR_Opr rig
     switch (code) {
       case lir_add: __ fadd_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
       case lir_sub: __ fsub_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
-      case lir_mul_strictfp:  // fall through
+      case lir_mul_strictfp: // fall through
       case lir_mul: __ fmul_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
-      case lir_div_strictfp:  // fall through
+      case lir_div_strictfp: // fall through
       case lir_div: __ fdiv_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
       default:
         ShouldNotReachHere();
diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp
index b7a2cbf2912..ab0a9963fc1 100644
--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp
+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp
@@ -1,6 +1,5 @@
 /*
  * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
  * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -23,6 +22,7 @@
  * questions.
  *
  */
+
 #ifndef CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP
 #define CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP
 
@@ -32,4 +32,6 @@
   void arith_op_single_fpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest);
   void arith_op_double_fpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest);
   void arith_op_single_cpu_right_constant(LIR_Code code, LIR_Opr left, LIR_Opr right, Register lreg, Register dreg);
+  void arithmetic_idiv(LIR_Op3* op, bool is_irem);
+
 #endif // CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp
index 7502fc2d161..b7f53e395f3 100644
--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp
@@ -1,7 +1,6 @@
 /*
  * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -350,12 +349,10 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
 void LIR_Assembler::arraycopy_prepare_params(Register src, Register src_pos, Register length,
                                              Register dst, Register dst_pos, BasicType basic_type) {
   int scale = array_element_size(basic_type);
-  __ slli(t0, src_pos, scale);
-  __ add(c_rarg0, src, t0);
+  __ shadd(c_rarg0, src_pos, src, t0, scale);
   __ add(c_rarg0, c_rarg0, arrayOopDesc::base_offset_in_bytes(basic_type));
   assert_different_registers(c_rarg0, dst, dst_pos, length);
-  __ slli(t0, dst_pos, scale);
-  __ add(c_rarg1, dst, t0);
+  __ shadd(c_rarg1, dst_pos, dst, t0, scale);
   __ add(c_rarg1, c_rarg1, arrayOopDesc::base_offset_in_bytes(basic_type));
   assert_different_registers(c_rarg1, dst, length);
   __ mv(c_rarg2, length);
diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp
index 1b9990e20c7..06a0f248ca6 100644
--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp
+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp
@@ -1,6 +1,5 @@
 /*
  * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
  * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -26,6 +25,7 @@
 
 #ifndef CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP
 #define CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP
+
   // arraycopy sub functions
   void generic_arraycopy(Register src, Register src_pos, Register length,
                          Register dst, Register dst_pos, CodeStub *stub);
@@ -48,4 +48,5 @@
                             Register dst, Register dst_pos);
   void arraycopy_load_args(Register src, Register src_pos, Register length,
                            Register dst, Register dst_pos);
+
 #endif // CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
index ce1dc4fc456..97e87aa85c5 100644
--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -36,8 +36,6 @@
 #include "ci/ciArrayKlass.hpp"
 #include "ci/ciInstance.hpp"
 #include "code/compiledIC.hpp"
-#include "gc/shared/barrierSet.hpp"
-#include "gc/shared/cardTableBarrierSet.hpp"
 #include "gc/shared/collectedHeap.hpp"
 #include "nativeInst_riscv.hpp"
 #include "oops/objArrayKlass.hpp"
@@ -100,25 +98,11 @@ LIR_Opr LIR_Assembler::osrBufferPointer() {
   return FrameMap::as_pointer_opr(receiverOpr()->as_register());
 }
 
-//--------------fpu register translations-----------------------
-void LIR_Assembler::set_24bit_FPU() { Unimplemented(); }
-
-void LIR_Assembler::reset_FPU() { Unimplemented(); }
-
-void LIR_Assembler::fpop() { Unimplemented(); }
-
-void LIR_Assembler::fxch(int i) { Unimplemented(); }
-
-void LIR_Assembler::fld(int i) { Unimplemented(); }
-
-void LIR_Assembler::ffree(int i) { Unimplemented(); }
-
 void LIR_Assembler::breakpoint() { Unimplemented(); }
 
 void LIR_Assembler::push(LIR_Opr opr) { Unimplemented(); }
 
 void LIR_Assembler::pop(LIR_Opr opr) { Unimplemented(); }
-//-------------------------------------------
 
 static jlong as_long(LIR_Opr data) {
   jlong result;
@@ -136,6 +120,43 @@ static jlong as_long(LIR_Opr data) {
   return result;
 }
 
+Address LIR_Assembler::as_Address(LIR_Address* addr, Register tmp) {
+  if (addr->base()->is_illegal()) {
+    assert(addr->index()->is_illegal(), "must be illegal too");
+    __ movptr(tmp, addr->disp());
+    return Address(tmp, 0);
+  }
+
+  Register base = addr->base()->as_pointer_register();
+  LIR_Opr index_opr = addr->index();
+
+  if (index_opr->is_illegal()) {
+    return Address(base, addr->disp());
+  }
+
+  int scale = addr->scale();
+  if (index_opr->is_cpu_register()) {
+    Register index;
+    if (index_opr->is_single_cpu()) {
+      index = index_opr->as_register();
+    } else {
+      index = index_opr->as_register_lo();
+    }
+    if (scale != 0) {
+      __ shadd(tmp, index, base, tmp, scale);
+    } else {
+      __ add(tmp, base, index);
+    }
+    return Address(tmp, addr->disp());
+  } else if (index_opr->is_constant()) {
+    intptr_t addr_offset = (((intptr_t)index_opr->as_constant_ptr()->as_jint()) << scale) + addr->disp();
+    return Address(base, addr_offset);
+  }
+
+  Unimplemented();
+  return Address();
+}
+
 Address LIR_Assembler::as_Address_hi(LIR_Address* addr) {
   ShouldNotReachHere();
   return Address();
@@ -640,8 +661,7 @@ void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool po
   }
 }
 
-void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info,
-                            bool pop_fpu_stack, bool wide, bool /* unaligned */) {
+void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide, bool /* unaligned */) {
   LIR_Address* to_addr = dest->as_address_ptr();
   // t0 was used as tmp reg in as_Address, so we use t1 as compressed_src
   Register compressed_src = t1;
@@ -763,8 +783,7 @@ void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) {
   reg2stack(temp, dest, dest->type(), false);
 }
 
-void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info,
-                            bool wide, bool /* unaligned */) {
+void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide, bool /* unaligned */) {
   assert(src->is_address(), "should not call otherwise");
   assert(dest->is_register(), "should not call otherwise");
 
@@ -809,6 +828,9 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch
       __ ld(dest->as_register(), as_Address(from_addr));
       break;
     case T_ADDRESS:
+      // FIXME: OMG this is a horrible kludge.  Any offset from an
+      // address that matches klass_offset_in_bytes() will be loaded
+      // as a word, not a long.
       if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) {
         __ lwu(dest->as_register(), as_Address(from_addr));
       } else {
@@ -960,13 +982,13 @@ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
     case Bytecodes::_d2f:
       __ fcvt_s_d(dest->as_float_reg(), src->as_double_reg()); break;
     case Bytecodes::_i2c:
-      __ zero_ext(dest->as_register(), src->as_register(), registerSize - 16); break; // 16: char size
+      __ zero_extend(dest->as_register(), src->as_register(), 16); break;
     case Bytecodes::_i2l:
       __ addw(dest->as_register_lo(), src->as_register(), zr); break;
     case Bytecodes::_i2s:
-      __ sign_ext(dest->as_register(), src->as_register(), registerSize - 16); break; // 16: short size
+      __ sign_extend(dest->as_register(), src->as_register(), 16); break;
     case Bytecodes::_i2b:
-      __ sign_ext(dest->as_register(), src->as_register(), registerSize - 8); break;  // 8: byte size
+      __ sign_extend(dest->as_register(), src->as_register(), 8); break;
     case Bytecodes::_l2i:
       _masm->block_comment("FIXME: This coulde be no-op");
       __ addw(dest->as_register(), src->as_register_lo(), zr); break;
@@ -1331,7 +1353,12 @@ void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Op
   }
 }
 
-void LIR_Assembler::align_call(LIR_Code code) { }
+void LIR_Assembler::align_call(LIR_Code code) {
+  // With RVC a call instruction may get 2-byte aligned.
+  // The address of the call instruction needs to be 4-byte aligned to
+  // ensure that it does not span a cache line so that it can be patched.
+  __ align(4);
+}
 
 void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) {
   address call = __ trampoline_call(Address(op->addr(), rtype));
@@ -1351,10 +1378,14 @@ void LIR_Assembler::ic_call(LIR_OpJavaCall* op) {
   add_call_info(code_offset(), op->info());
 }
 
-void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) { ShouldNotReachHere(); }
+/* Currently, vtable-dispatch is only enabled for sparc platforms */
+void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) {
+  ShouldNotReachHere();
+}
 
 void LIR_Assembler::emit_static_call_stub() {
   address call_pc = __ pc();
+  assert((__ offset() % 4) == 0, "bad alignment");
   address stub = __ start_a_stub(call_stub_size());
   if (stub == NULL) {
     bailout("static call stub overflow");
@@ -1366,7 +1397,8 @@ void LIR_Assembler::emit_static_call_stub() {
   __ relocate(static_stub_Relocation::spec(call_pc));
   __ emit_static_call_stub();
 
-  assert(__ offset() - start + CompiledStaticCall::to_trampoline_stub_size() <= call_stub_size(), "stub too big");
+  assert(__ offset() - start + CompiledStaticCall::to_trampoline_stub_size()
+         <= call_stub_size(), "stub too big");
   __ end_a_stub();
 }
 
@@ -1668,8 +1700,7 @@ void LIR_Assembler::check_no_conflict(ciKlass* exact_klass, intptr_t current_kla
   }
 #endif
     // first time here. Set profile type.
-    // TODO: Fix this typo. See JDK-8267625.
-    __ ld(tmp, mdo_addr);
+    __ sd(tmp, mdo_addr);
   } else {
     assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
            ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent");
@@ -1774,30 +1805,33 @@ void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) {
 
 
 void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) {
-  if (patch_code != lir_patch_none) {
+#if INCLUDE_SHENANDOAHGC
+  if (UseShenandoahGC && patch_code != lir_patch_none) {
     deoptimize_trap(info);
     return;
   }
+#endif
 
+  assert(patch_code == lir_patch_none, "Patch code not supported");
   LIR_Address* adr = addr->as_address_ptr();
   Register dst = dest->as_register_lo();
 
   assert_different_registers(dst, t0);
-  if(adr->base()->is_valid() && dst == adr->base()->as_pointer_register() && (!adr->index()->is_cpu_register())) {
-
+  if (adr->base()->is_valid() && dst == adr->base()->as_pointer_register() && (!adr->index()->is_cpu_register())) {
+    int scale = adr->scale();
     intptr_t offset = adr->disp();
     LIR_Opr index_op = adr->index();
-    int scale = adr->scale();
-    if(index_op->is_constant()) {
+    if (index_op->is_constant()) {
       offset += ((intptr_t)index_op->as_constant_ptr()->as_jint()) << scale;
     }
 
-    if(!is_imm_in_range(offset, 12, 0)) {
+    if (!is_imm_in_range(offset, 12, 0)) {
       __ la(t0, as_Address(adr));
       __ mv(dst, t0);
       return;
     }
   }
+
   __ la(dst, as_Address(adr));
 }
 
@@ -1817,13 +1851,11 @@ void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* arg
   if (info != NULL) {
     add_call_info_here(info);
   }
-  __ ifence();
 }
 
 void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) {
   if (dest->is_address() || src->is_address()) {
-    move_op(src, dest, type, lir_patch_none, info, /* pop_fpu_stack */ false,
-           /* unaligned */ false, /* wide */ false);
+    move_op(src, dest, type, lir_patch_none, info, /* pop_fpu_stack */ false, /*unaligned*/ false, /* wide */ false);
   } else {
     ShouldNotReachHere();
   }
@@ -1950,42 +1982,6 @@ int LIR_Assembler::array_element_size(BasicType type) const {
   return exact_log2(elem_size);
 }
 
-Address LIR_Assembler::as_Address(LIR_Address* addr, Register tmp) {
-  if (addr->base()->is_illegal()) {
-    assert(addr->index()->is_illegal(), "must be illegal too");
-    __ movptr(tmp, addr->disp());
-    return Address(tmp, 0);
-  }
-
-  Register base = addr->base()->as_pointer_register();
-  LIR_Opr index_op = addr->index();
-  int scale = addr->scale();
-
-  if (index_op->is_illegal()) {
-    return Address(base, addr->disp());
-  } else if (index_op->is_cpu_register()) {
-    Register index;
-    if (index_op->is_single_cpu()) {
-      index = index_op->as_register();
-    } else {
-      index = index_op->as_register_lo();
-    }
-    if (scale != 0) {
-      __ slli(tmp, index, scale);
-      __ add(tmp, base, tmp);
-    } else {
-      __ add(tmp, base, index);
-    }
-    return Address(tmp, addr->disp());
-  } else if (index_op->is_constant()) {
-    intptr_t addr_offset = (((intptr_t)index_op->as_constant_ptr()->as_jint()) << scale) + addr->disp();
-    return Address(base, addr_offset);
-  }
-
-  Unimplemented();
-  return Address();
-}
-
 // helper functions which checks for overflow and sets bailout if it
 // occurs.  Always returns a valid embeddable pointer but in the
 // bailout case the pointer won't be to unique storage.
@@ -2019,6 +2015,18 @@ address LIR_Assembler::int_constant(jlong n) {
   }
 }
 
+void LIR_Assembler::set_24bit_FPU() { Unimplemented(); }
+
+void LIR_Assembler::reset_FPU() { Unimplemented(); }
+
+void LIR_Assembler::fpop() { Unimplemented(); }
+
+void LIR_Assembler::fxch(int i) { Unimplemented(); }
+
+void LIR_Assembler::fld(int i) { Unimplemented(); }
+
+void LIR_Assembler::ffree(int i) { Unimplemented(); }
+
 void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) {
   __ cmpxchg(addr, cmpval, newval, Assembler::int32, Assembler::aq /* acquire */,
              Assembler::rl /* release */, t0, true /* result as bool */);
diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp
index a8d58d7c7a2..5c81f1c704c 100644
--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -68,17 +68,16 @@ friend class ArrayCopyStub;
 
   void deoptimize_trap(CodeEmitInfo *info);
 
-  enum
-  {
-    // see emit_static_call_stub for detail:
+  enum {
+    // See emit_static_call_stub for detail
     // CompiledStaticCall::to_interp_stub_size() (14) + CompiledStaticCall::to_trampoline_stub_size() (1 + 3 + address)
     _call_stub_size = 14 * NativeInstruction::instruction_size +
                       (NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size),
     _call_aot_stub_size = 0,
-    // see emit_exception_handler for detail:
+    // See emit_exception_handler for detail
     // verify_not_null_oop + far_call + should_not_reach_here + invalidate_registers(DEBUG_ONLY)
     _exception_handler_size = DEBUG_ONLY(584) NOT_DEBUG(548), // or smaller
-    // see emit_deopt_handler for detail
+    // See emit_deopt_handler for detail
     // auipc (1) + far_jump (6 or 2)
     _deopt_handler_size = 1 * NativeInstruction::instruction_size +
                           6 * NativeInstruction::instruction_size // or smaller
diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
index 58aeec448b8..c41819fc2ae 100644
--- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -142,7 +142,6 @@ bool LIRGenerator::can_inline_as_constant(Value v) const {
   return false;
 }
 
-
 bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const {
   if (c->as_constant() != NULL) {
     long constant = 0;
@@ -158,7 +157,6 @@ bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const {
   return false;
 }
 
-
 LIR_Opr LIRGenerator::safepoint_poll_register() {
   return LIR_OprFact::illegalOpr;
 }
@@ -166,6 +164,7 @@ LIR_Opr LIRGenerator::safepoint_poll_register() {
 LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index,
                                             int shift, int disp, BasicType type) {
   assert(base->is_register(), "must be");
+
   if (index->is_constant()) {
     LIR_Const *constant = index->as_constant_ptr();
     jlong c;
@@ -182,9 +181,9 @@ LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index,
       __ move(index, tmp);
       return new LIR_Address(base, tmp, type);
     }
-  } else {
-    return new LIR_Address(base, index, (LIR_Address::Scale)shift, disp, type);
   }
+
+  return new LIR_Address(base, index, (LIR_Address::Scale)shift, disp, type);
 }
 
 LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_opr,
@@ -192,28 +191,23 @@ LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_o
   int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(type);
   int elem_size = type2aelembytes(type);
   int shift = exact_log2(elem_size);
-
-  LIR_Address* addr = NULL;
-  if (index_opr->is_constant()) {
-    addr = new LIR_Address(array_opr, offset_in_bytes + (intx)(index_opr->as_jint()) * elem_size, type);
-  } else {
-    if (index_opr->type() == T_INT) {
-      LIR_Opr tmp = new_register(T_LONG);
-      __ convert(Bytecodes::_i2l, index_opr, tmp);
-      index_opr = tmp;
-    }
-    addr = new LIR_Address(array_opr, index_opr, LIR_Address::scale(type), offset_in_bytes, type);
-  }
-  return addr;
+  return generate_address(array_opr, index_opr, shift, offset_in_bytes, type);
 }
 
 LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) {
+  LIR_Opr r;
   switch (type) {
-    case T_LONG: return LIR_OprFact::longConst(x);
-    case T_INT:  return LIR_OprFact::intConst(x);
-    default:     ShouldNotReachHere();
+    case T_LONG:
+      r = LIR_OprFact::longConst(x);
+      break;
+    case T_INT:
+      r = LIR_OprFact::intConst(x);
+      break;
+    default:
+      ShouldNotReachHere();
+      r = NULL;
   }
-  return NULL;
+  return r;
 }
 
 void LIRGenerator::increment_counter(address counter, BasicType type, int step) {
@@ -283,10 +277,10 @@ void LIRGenerator::do_MonitorEnter(MonitorEnter* x) {
 
   // "lock" stores the address of the monitor stack slot, so this is not an oop
   LIR_Opr lock = new_register(T_INT);
-  // Need a tmp register for biased locking
-  LIR_Opr tmp = LIR_OprFact::illegalOpr;
+  // Need a scratch register for biased locking
+  LIR_Opr scratch = LIR_OprFact::illegalOpr;
   if (UseBiasedLocking) {
-    tmp = new_register(T_INT);
+    scratch = new_register(T_INT);
   }
 
   CodeEmitInfo* info_for_exception = NULL;
@@ -296,7 +290,7 @@ void LIRGenerator::do_MonitorEnter(MonitorEnter* x) {
   // this CodeEmitInfo must not have the xhandlers because here the
   // object is already locked (xhandlers expect object to be unlocked)
   CodeEmitInfo* info = state_for(x, x->state(), true);
-  monitor_enter(obj.result(), lock, syncTempOpr(), tmp,
+  monitor_enter(obj.result(), lock, syncTempOpr(), scratch,
                 x->monitor_no(), info_for_exception, info);
 }
 
@@ -380,7 +374,7 @@ void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) {
 void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) {
 
   // missing test if instr is commutative and if we should swap
-  LIRItem left(x->x(),  this);
+  LIRItem left(x->x(), this);
   LIRItem right(x->y(), this);
 
   if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) {
@@ -393,7 +387,7 @@ void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) {
       // no need to do div-by-zero check if the divisor is a non-zero constant
       if (c != 0) { need_zero_check = false; }
       // do not load right if the divisor is a power-of-2 constant
-      if (c > 0 && is_power_of_2(c)) {
+      if (c > 0 && is_power_of_2_long(c)) {
         right.dont_load_item();
       } else {
         right.load_item();
@@ -404,7 +398,7 @@ void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) {
     if (need_zero_check) {
       CodeEmitInfo* info = state_for(x);
       __ cmp(lir_cond_equal, right.result(), LIR_OprFact::longConst(0));
-      __ branch(lir_cond_equal, T_INT, new DivByZeroStub(info));
+      __ branch(lir_cond_equal, T_LONG, new DivByZeroStub(info));
     }
 
     rlock_result(x);
@@ -561,7 +555,7 @@ void LIRGenerator::do_LogicOp(LogicOp* x) {
   left.load_item();
   rlock_result(x);
   ValueTag tag = right.type()->tag();
-  if(right.is_constant() &&
+  if (right.is_constant() &&
      ((tag == longTag && Assembler::operand_valid_for_add_immediate(right.get_jlong_constant())) ||
       (tag == intTag && Assembler::operand_valid_for_add_immediate(right.get_jint_constant()))))  {
     right.dont_load_item();
@@ -663,14 +657,22 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
       value.load_item();
       LIR_Opr dst = rlock_result(x);
 
-      if (x->id() == vmIntrinsics::_dsqrt) {
-        __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr);
-      } else { // vmIntrinsics::_dabs
-        __ abs(value.result(), dst, LIR_OprFact::illegalOpr);
+      switch (x->id()) {
+        case vmIntrinsics::_dsqrt: {
+          __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr);
+          break;
+        }
+        case vmIntrinsics::_dabs: {
+          __ abs(value.result(), dst, LIR_OprFact::illegalOpr);
+          break;
+        }
+        default:
+          ShouldNotReachHere();
       }
       break;
     }
-    default: ShouldNotReachHere();
+    default:
+      ShouldNotReachHere();
   }
 }
 
@@ -1088,9 +1090,5 @@ void LIRGenerator::volatile_field_store(LIR_Opr value, LIR_Address* address,
 
 void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result,
                                        CodeEmitInfo* info) {
-  if (!UseBarriersForVolatile) {
-    __ membar();
-  }
-
   __ volatile_load_mem_reg(address, result, info);
 }
diff --git a/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp
index 3d0b6b063c2..78a61128bdd 100644
--- a/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -29,5 +29,5 @@
 #include "utilities/bitMap.inline.hpp"
 
 void LinearScan::allocate_fpu_stack() {
-  // No FPU stack on RISCV64
+  // No FPU stack on RISCV
 }
diff --git a/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp
index bec623f2ec6..d7ca7b0fd05 100644
--- a/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp
+++ b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -36,7 +36,6 @@ inline int LinearScan::num_physical_regs(BasicType type) {
   return 1;
 }
 
-
 inline bool LinearScan::requires_adjacent_regs(BasicType type) {
   return false;
 }
@@ -58,8 +57,8 @@ inline bool LinearScan::is_caller_save(int assigned_reg) {
   return false;
 }
 
-
 inline void LinearScan::pd_add_temps(LIR_Op* op) {
+  // No special case behaviours yet
 }
 
 
@@ -81,5 +80,4 @@ inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur)
   return false;
 }
 
-
 #endif // CPU_RISCV_C1_LINEARSCAN_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
index 110118caab3..99d981f97f4 100644
--- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -33,7 +33,6 @@
 #include "gc/shared/collectedHeap.hpp"
 #include "interpreter/interpreter.hpp"
 #include "oops/arrayOop.hpp"
-#include "oops/markOop.hpp"
 #include "runtime/basicLock.hpp"
 #include "runtime/biasedLocking.hpp"
 #include "runtime/os.hpp"
@@ -51,7 +50,7 @@ void C1_MacroAssembler::float_cmp(bool is_float, int unordered_result,
   }
 }
 
-int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register tmp, Label& slow_case) {
+int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) {
   const int aligned_mask = BytesPerWord - 1;
   const int hdr_offset = oopDesc::mark_offset_in_bytes();
   assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
@@ -64,8 +63,8 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr
   sd(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
 
   if (UseBiasedLocking) {
-    assert(tmp != noreg, "should have tmp register at this point");
-    null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, tmp, false, done, &slow_case);
+    assert(scratch != noreg, "should have scratch register at this point");
+    null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, scratch, false, done, &slow_case);
   } else {
     null_check_offset = offset();
   }
@@ -182,7 +181,7 @@ void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register
 }
 
 // preserves obj, destroys len_in_bytes
-void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp1) {
+void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp) {
   assert(hdr_size_in_bytes >= 0, "header size must be positive or 0");
   Label done;
 
@@ -194,7 +193,7 @@ void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int
   if (hdr_size_in_bytes) {
     add(obj, obj, hdr_size_in_bytes);
   }
-  zero_memory(obj, len_in_bytes, tmp1);
+  zero_memory(obj, len_in_bytes, tmp);
   if (hdr_size_in_bytes) {
     sub(obj, obj, hdr_size_in_bytes);
   }
@@ -288,8 +287,7 @@ void C1_MacroAssembler::allocate_array(Register obj, Register len, Register tmp1
   const Register arr_size = tmp2; // okay to be the same
   // align object end
   mv(arr_size, (int32_t)header_size * BytesPerWord + MinObjAlignmentInBytesMask);
-  slli(t0, len, f);
-  add(arr_size, arr_size, t0);
+  shadd(arr_size, len, arr_size, t0, f);
   andi(arr_size, arr_size, ~(uint)MinObjAlignmentInBytesMask);
 
   try_allocate(obj, arr_size, 0, tmp1, tmp2, slow_case);
@@ -320,14 +318,14 @@ void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache, L
 
 void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) {
   // If we have to make this method not-entrant we'll overwrite its
-  // first instruction with a jump.  For this action to be legal we
+  // first instruction with a jump. For this action to be legal we
   // must ensure that this first instruction is a J, JAL or NOP.
   // Make it a NOP.
   nop();
+
   assert(bang_size_in_bytes >= framesize, "stack bang size incorrect");
   // Make sure there is enough stack space for this method's activation.
-  // Note that we do this before doing an enter().
-
+  // Note that we do this before creating a frame.
   generate_stack_overflow_check(bang_size_in_bytes);
   MacroAssembler::build_frame(framesize);
 }
@@ -338,21 +336,15 @@ void C1_MacroAssembler::remove_frame(int framesize) {
 
 
 void C1_MacroAssembler::verified_entry() {
-  // If we have to make this method not-entrant we'll overwrite its
-  // first instruction with a jump. For this action to be legal we
-  // must ensure that this first instruction is a J, JAL or NOP.
-  // Make it a NOP.
-
-  nop();
 }
 
 void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) {
-  //  fp + 0: link
-  //     + 1: return address
-  //     + 2: argument with offset 0
-  //     + 3: argument with offset 1
-  //     + 4: ...
-  ld(reg, Address(fp, (offset_in_words + 2) * BytesPerWord));
+  //  fp + -2: link
+  //     + -1: return address
+  //     +  0: argument with offset 0
+  //     +  1: argument with offset 1
+  //     +  2: ...
+  ld(reg, Address(fp, offset_in_words * BytesPerWord));
 }
 
 #ifndef PRODUCT
@@ -431,9 +423,9 @@ void C1_MacroAssembler::c1_cmp_branch(int cmpFlag, Register op1, Register op2, L
   if (type == T_OBJECT || type == T_ARRAY) {
     assert(cmpFlag == lir_cond_equal || cmpFlag == lir_cond_notEqual, "Should be equal or notEqual");
     if (cmpFlag == lir_cond_equal) {
-      oop_beq(op1, op2, label, is_far);
+      beq(op1, op2, label, is_far);
     } else {
-      oop_bne(op1, op2, label, is_far);
+      bne(op1, op2, label, is_far);
     }
   } else {
     assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(c1_cond_branch) / sizeof(c1_cond_branch[0])),
diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp
index 4e4368bf6c2..1950cee5dd5 100644
--- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -42,14 +42,14 @@ using MacroAssembler::null_check;
   void try_allocate(
     Register obj,                      // result: pointer to object after successful allocation
     Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
-    int      con_size_in_bytes,        // object size in bytes if   known at compile time
+    int      con_size_in_bytes,        // object size in bytes if known at compile time
     Register tmp1,                     // temp register
     Register tmp2,                     // temp register
     Label&   slow_case                 // continuation point if fast allocation fails
   );
 
   void initialize_header(Register obj, Register klass, Register len, Register tmp1, Register tmp2);
-  void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp1);
+  void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp);
 
   void float_cmp(bool is_float, int unordered_result,
                  FloatRegister f0, FloatRegister f1,
@@ -59,9 +59,9 @@ using MacroAssembler::null_check;
   // hdr     : must be x10, contents destroyed
   // obj     : must point to the object to lock, contents preserved
   // disp_hdr: must point to the displaced header location, contents preserved
-  // tmp     : temporary register, contents destroyed
+  // scratch : scratch register, contents destroyed
   // returns code offset at which to add null check debug information
-  int lock_object  (Register swap, Register obj, Register disp_hdr, Register tmp, Label& slow_case);
+  int lock_object  (Register swap, Register obj, Register disp_hdr, Register scratch, Label& slow_case);
 
   // unlocking
   // hdr     : contents destroyed
diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
index 8d8db47b71a..329df2e1ca7 100644
--- a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -83,7 +83,6 @@ int StubAssembler::call_RT(Register oop_result, Register metadata_result, addres
   pop_reg(x10, sp);
 #endif
   reset_last_Java_frame(true);
-  ifence();
 
   // check for pending exceptions
   { Label L;
@@ -227,11 +226,11 @@ const int float_regs_as_doubles_size_in_slots = pd_nof_fpu_regs_frame_map * 2;
 //
 
 enum reg_save_layout {
-  reg_save_frame_size = 32 /* float */ + 32 /* integer */
+  reg_save_frame_size = 32 /* float */ + 30 /* integer excluding x3, x4 */
 };
 
 // Save off registers which might be killed by calls into the runtime.
-// Tries to smart of about FP registers.  In particular we separate
+// Tries to smart of about FPU registers.  In particular we separate
 // saving and describing the FPU registers for deoptimization since we
 // have to save the FPU registers twice if we describe them.  The
 // deopt blob is the only thing which needs to describe FPU registers.
@@ -248,11 +247,12 @@ static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers) {
   OopMap* oop_map = new OopMap(frame_size_in_slots, 0);
   assert_cond(oop_map != NULL);
 
-  // cpu_regs, caller save registers only, see FrameMap::initialize
-  // in c1_FrameMap_riscv64.cpp for detail.
-  const static Register caller_save_cpu_regs[FrameMap::max_nof_caller_save_cpu_regs] = {x7, x10, x11, x12,
-                                                                                        x13, x14, x15, x16, x17,
-                                                                                        x28,  x29, x30, x31};
+  // caller save registers only, see FrameMap::initialize
+  // in c1_FrameMap_riscv.cpp for detail.
+  const static Register caller_save_cpu_regs[FrameMap::max_nof_caller_save_cpu_regs] = {
+    x7, x10, x11, x12, x13, x14, x15, x16, x17, x28, x29, x30, x31
+  };
+
   for (int i = 0; i < FrameMap::max_nof_caller_save_cpu_regs; i++) {
     Register r = caller_save_cpu_regs[i];
     int sp_offset = cpu_reg_save_offsets[r->encoding()];
@@ -276,8 +276,8 @@ static OopMap* save_live_registers(StubAssembler* sasm,
                                    bool save_fpu_registers = true) {
   __ block_comment("save_live_registers");
 
-  // if the number of pushed regs is odd, zr will be added
-  __ push_reg(RegSet::range(x3, x31), sp);    // integer registers except ra(x1) & sp(x2)
+  // if the number of pushed regs is odd, one slot will be reserved for alignment
+  __ push_reg(RegSet::range(x5, x31), sp);    // integer registers except ra(x1) & sp(x2) & gp(x3) & tp(x4)
 
   if (save_fpu_registers) {
     // float registers
@@ -286,7 +286,7 @@ static OopMap* save_live_registers(StubAssembler* sasm,
       __ fsd(as_FloatRegister(i), Address(sp, i * wordSize));
     }
   } else {
-    // we define reg_save_layout = 64 as the fixed frame size,
+    // we define reg_save_layout = 62 as the fixed frame size,
     // we should also sub 32 * wordSize to sp when save_fpu_registers == false
     __ addi(sp, sp, -32 * wordSize);
   }
@@ -306,8 +306,8 @@ static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registe
     __ addi(sp, sp, 32 * wordSize);
   }
 
-  // if the number of popped regs is odd, zr will be added
-  __ pop_reg(RegSet::range(x3, x31), sp);   // integer registers except ra(x1) & sp(x2)
+  // if the number of popped regs is odd, the reserved slot for alignment will be removed
+  __ pop_reg(RegSet::range(x5, x31), sp);   // integer registers except ra(x1) & sp(x2) & gp(x3) & tp(x4)
 }
 
 static void restore_live_registers_except_r10(StubAssembler* sasm, bool restore_fpu_registers = true) {
@@ -322,10 +322,10 @@ static void restore_live_registers_except_r10(StubAssembler* sasm, bool restore_
     __ addi(sp, sp, 32 * wordSize);
   }
 
-  // if the number of popped regs is odd, zr will be added
-  // integer registers except ra(x1) & sp(x2) & x10
-  __ pop_reg(RegSet::range(x3, x9), sp);   // pop zr, x3 ~ x9
-  __ pop_reg(RegSet::range(x11, x31), sp); // pop x10 ~ x31, x10 will be loaded to zr
+  // pop integer registers except ra(x1) & sp(x2) & gp(x3) & tp(x4) & x10
+  // there is one reserved slot for alignment on the stack in save_live_registers().
+  __ pop_reg(RegSet::range(x5, x9), sp);   // pop x5 ~ x9 with the reserved slot for alignment
+  __ pop_reg(RegSet::range(x11, x31), sp); // pop x11 ~ x31; x10 will be automatically skipped here
 }
 
 void Runtime1::initialize_pd() {
@@ -339,11 +339,10 @@ void Runtime1::initialize_pd() {
     sp_offset += step;
   }
 
-  // we save x0, x3 ~ x31, except x1, x2
-  cpu_reg_save_offsets[0] = sp_offset;
+  // a slot reserved for stack 16-byte alignment, see MacroAssembler::push_reg
   sp_offset += step;
-  // 3: loop starts from x3
-  for (i = 3; i < FrameMap::nof_cpu_regs; i++) {
+  // we save x5 ~ x31, except x0 ~ x4: loop starts from x5
+  for (i = 5; i < FrameMap::nof_cpu_regs; i++) {
     cpu_reg_save_offsets[i] = sp_offset;
     sp_offset += step;
   }
@@ -397,7 +396,7 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
       __ sd(zr, Address(xthread, Thread::pending_exception_offset()));
 
       // load issuing PC (the return address for this stub) into x13
-      __ ld(exception_pc, Address(fp, 1 * BytesPerWord));
+      __ ld(exception_pc, Address(fp, frame::return_addr_offset * BytesPerWord));
 
       // make sure that the vm_results are cleared (may be unnecessary)
       __ sd(zr, Address(xthread, JavaThread::vm_result_offset()));
@@ -410,7 +409,7 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
       break;
     case handle_exception_from_callee_id: {
       // At this point all registers except exception oop (x10) and
-      // exception pc (lr) are dead.
+      // exception pc (ra) are dead.
       const int frame_size = 2 /* fp, return address */;
       oop_map = new OopMap(frame_size * VMRegImpl::slots_per_word, 0);
       sasm->set_frame_size(frame_size);
@@ -448,7 +447,7 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
   __ sd(exception_pc, Address(xthread, JavaThread::exception_pc_offset()));
 
   // patch throwing pc into return address (has bci & oop map)
-  __ sd(exception_pc, Address(fp, 1 * BytesPerWord));
+  __ sd(exception_pc, Address(fp, frame::return_addr_offset * BytesPerWord));
 
   // compute the exception handler.
   // the exception oop and the throwing pc are read from the fields in JavaThread
@@ -464,7 +463,7 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
   __ invalidate_registers(false, true, true, true, true, true);
 
   // patch the return address, this stub will directly return to the exception handler
-  __ sd(x10, Address(fp, 1 * BytesPerWord));
+  __ sd(x10, Address(fp, frame::return_addr_offset * BytesPerWord));
 
   switch (id) {
     case forward_exception_id:
@@ -478,7 +477,7 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
       __ leave();
       __ ret();  // jump to exception handler
       break;
-    default:  ShouldNotReachHere();
+    default: ShouldNotReachHere();
   }
 
   return oop_maps;
@@ -514,10 +513,10 @@ void Runtime1::generate_unwind_exception(StubAssembler *sasm) {
   // save exception_oop
   __ addi(sp, sp, -2 * wordSize);
   __ sd(exception_oop, Address(sp, wordSize));
-  __ sd(lr, Address(sp));
+  __ sd(ra, Address(sp));
 
   // search the exception handler address of the caller (using the return address)
-  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), xthread, lr);
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), xthread, ra);
   // x10: exception handler address of the caller
 
   // Only x10 is valid at this time; all other registers have been
@@ -528,11 +527,11 @@ void Runtime1::generate_unwind_exception(StubAssembler *sasm) {
   __ mv(handler_addr, x10);
 
   // get throwing pc (= return address).
-  // lr has been destroyed by the call
-  __ ld(lr, Address(sp));
+  // ra has been destroyed by the call
+  __ ld(ra, Address(sp));
   __ ld(exception_oop, Address(sp, wordSize));
   __ addi(sp, sp, 2 * wordSize);
-  __ mv(x13, lr);
+  __ mv(x13, ra);
 
   __ verify_not_null_oop(exception_oop);
 
@@ -581,17 +580,14 @@ OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) {
 #endif
   __ reset_last_Java_frame(true);
 
-  __ ifence();
-
   // check for pending exceptions
-  {
-    Label L;
+  { Label L;
     __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
     __ beqz(t0, L);
     // exception pending => remove activation and forward to exception handler
 
     { Label L1;
-      __ bnez(x10, L1);                                  // have we deoptimized?
+      __ bnez(x10, L1);                                 // have we deoptimized?
       __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id)));
       __ bind(L1);
     }
@@ -649,6 +645,7 @@ OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) {
 
   // Will reexecute. Proper return address is already on the stack we just restore
   // registers, pop all of our frame but the return address and jump to the deopt blob
+
   restore_live_registers(sasm);
   __ leave();
   __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
@@ -794,8 +791,8 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
         OopMap* map = save_live_registers(sasm);
         assert_cond(map != NULL);
 
-        const int bci_off = 2;
-        const int method_off = 3;
+        const int bci_off = 0;
+        const int method_off = 1;
         // Retrieve bci
         __ lw(bci, Address(fp, bci_off * BytesPerWord));
         // And a pointer to the Method*
@@ -859,8 +856,8 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
           __ sll(arr_size, length, t0);
           int lh_header_size_width = exact_log2(Klass::_lh_header_size_mask + 1);
           int lh_header_size_msb = Klass::_lh_header_size_shift + lh_header_size_width;
-          __ slli(tmp1, tmp1, registerSize - lh_header_size_msb);
-          __ srli(tmp1, tmp1, registerSize - lh_header_size_width);
+          __ slli(tmp1, tmp1, XLEN - lh_header_size_msb);
+          __ srli(tmp1, tmp1, XLEN - lh_header_size_width);
           __ add(arr_size, arr_size, tmp1);
           __ addi(arr_size, arr_size, MinObjAlignmentInBytesMask); // align up
           __ andi(arr_size, arr_size, ~(uint)MinObjAlignmentInBytesMask);
diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
index f7fcbb13067..9316d4be02e 100644
--- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
+++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
@@ -1,7 +1,6 @@
 /*
  * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -58,7 +57,7 @@ define_pd_global(uintx, CodeCacheMinBlockLength,     1);
 define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
 define_pd_global(uintx, MetaspaceSize,               12*M );
 define_pd_global(bool, NeverActAsServerClassMachine, true );
-define_pd_global(uint64_t,MaxRAM,                    1ULL*G);
+define_pd_global(uint64_t, MaxRAM,                   1ULL*G);
 define_pd_global(bool, CICompileOSR,                 true );
 #endif // !TIERED
 define_pd_global(bool, UseTypeProfile,               false);
@@ -67,6 +66,6 @@ define_pd_global(bool, RoundFPResults,               true );
 define_pd_global(bool, LIRFillDelaySlots,            false);
 define_pd_global(bool, OptimizeSinglePrecision,      true );
 define_pd_global(bool, CSEArrayLength,               false);
-define_pd_global(bool, TwoOperandLIRForm,            false );
+define_pd_global(bool, TwoOperandLIRForm,            false);
 
 #endif // CPU_RISCV_C1_GLOBALS_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
index 9955fe805a2..3da1f1c6d86 100644
--- a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
+++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
@@ -1,7 +1,6 @@
 /*
  * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -51,7 +50,7 @@ define_pd_global(intx, FLOATPRESSURE,                32);
 define_pd_global(intx, FreqInlineSize,               325);
 define_pd_global(intx, MinJumpTableSize,             10);
 define_pd_global(intx, INTPRESSURE,                  24);
-define_pd_global(intx, InteriorEntryAlignment,       4);
+define_pd_global(intx, InteriorEntryAlignment,       16);
 define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
 define_pd_global(intx, LoopUnrollLimit,              60);
 define_pd_global(intx, LoopPercentProfileLimit,      10);
@@ -77,7 +76,7 @@ define_pd_global(intx, ReservedCodeCacheSize,        48*M);
 define_pd_global(intx, NonProfiledCodeHeapSize,      21*M);
 define_pd_global(intx, ProfiledCodeHeapSize,         22*M);
 define_pd_global(intx, NonNMethodCodeHeapSize,       5*M );
-define_pd_global(uintx, CodeCacheMinBlockLength,     4);
+define_pd_global(uintx, CodeCacheMinBlockLength,     6);
 define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
 
 // Heap related flags
@@ -86,6 +85,6 @@ define_pd_global(uintx,MetaspaceSize,    ScaleForWordSize(16*M));
 // Ergonomics related flags
 define_pd_global(bool, NeverActAsServerClassMachine, false);
 
-define_pd_global(bool,  TrapBasedRangeChecks,        false); // Not needed.
+define_pd_global(bool, TrapBasedRangeChecks,         false); // Not needed.
 
 #endif // CPU_RISCV_C2_GLOBALS_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/c2_init_riscv.cpp b/src/hotspot/cpu/riscv/c2_init_riscv.cpp
index 1144bac9e10..cdbd69807be 100644
--- a/src/hotspot/cpu/riscv/c2_init_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c2_init_riscv.cpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -28,7 +28,7 @@
 #include "opto/compile.hpp"
 #include "opto/node.hpp"
 
-// processor dependent initialization for riscv64
+// processor dependent initialization for riscv
 
 extern void reg_mask_init();
 
diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
index 0af09b57f31..a29e5be9dbb 100644
--- a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
+++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
@@ -86,7 +86,7 @@ int CompiledStaticCall::reloc_to_interp_stub() {
 }
 
 void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) {
-  address stub = find_stub(false);
+  address stub = find_stub(false /* is_aot */);
   guarantee(stub != NULL, "stub not found");
 
   if (TraceICs) {
@@ -118,6 +118,7 @@ void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, ad
 }
 
 void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) {
+  assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call");
   // Reset stub.
   address stub = static_stub->addr();
   assert(stub != NULL, "stub not found");
@@ -134,12 +135,10 @@ void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_
 void CompiledDirectStaticCall::verify() {
   // Verify call.
   _call->verify();
-  if (os::is_MP()) {
-    _call->verify_alignment();
-  }
+  _call->verify_alignment();
 
   // Verify stub.
-  address stub = find_stub(false);
+  address stub = find_stub(false /* is_aot */);
   assert(stub != NULL, "no stub found for static call");
   // Creation also verifies the object.
   NativeMovConstReg* method_holder
diff --git a/src/hotspot/cpu/riscv/copy_riscv.hpp b/src/hotspot/cpu/riscv/copy_riscv.hpp
index f68298f3db1..05da242e354 100644
--- a/src/hotspot/cpu/riscv/copy_riscv.hpp
+++ b/src/hotspot/cpu/riscv/copy_riscv.hpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -30,8 +30,7 @@
 // Inline functions for memory copy and fill.
 
 // Contains inline asm implementations
-#include OS_CPU_HEADER(copy)
-
+#include OS_CPU_HEADER_INLINE(copy)
 
 static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) {
   julong* to = (julong*) tohw;
diff --git a/src/hotspot/cpu/riscv/depChecker_riscv.hpp b/src/hotspot/cpu/riscv/depChecker_riscv.hpp
index 18eba342cb7..e9ff307b647 100644
--- a/src/hotspot/cpu/riscv/depChecker_riscv.hpp
+++ b/src/hotspot/cpu/riscv/depChecker_riscv.hpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -24,9 +24,9 @@
  *
  */
 
-#ifndef CPU_RISCV64_VM_DEPCHECKER_RISCV64_HPP
-#define CPU_RISCV64_VM_DEPCHECKER_RISCV64_HPP
+#ifndef CPU_RISCV_VM_DEPCHECKER_RISCV_HPP
+#define CPU_RISCV_VM_DEPCHECKER_RISCV_HPP
 
-// Nothing to do on riscv64
+// Nothing to do on riscv
 
-#endif // CPU_RISCV64_VM_DEPCHECKER_RISCV64_HPP
+#endif // CPU_RISCV_VM_DEPCHECKER_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/disassembler_riscv.hpp b/src/hotspot/cpu/riscv/disassembler_riscv.hpp
index d8c04b431ca..06bca5298cd 100644
--- a/src/hotspot/cpu/riscv/disassembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/disassembler_riscv.hpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -15,7 +15,8 @@
  * accompanied this code).
  *
  * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  * or visit www.oracle.com if you need additional information or have any
@@ -26,32 +27,12 @@
 #ifndef CPU_RISCV_DISASSEMBLER_RISCV_HPP
 #define CPU_RISCV_DISASSEMBLER_RISCV_HPP
 
-  static int pd_instruction_alignment() {
-    return 1;
-  }
-
-  static const char* pd_cpu_opts() {
-    return NULL;
-  }
-
-// Returns address of n-th instruction preceding addr,
-// NULL if no preceding instruction can be found.
-// On (riscv64), we assume a constant instruction length.
-// It might be beneficial to check "is_readable" as we do on ppc and s390.
-static address find_prev_instr(address addr, int n_instr) {
-  return addr - Assembler::instruction_size * n_instr;
+static int pd_instruction_alignment() {
+  return 1;
 }
 
-// special-case instruction decoding.
-// There may be cases where the binutils disassembler doesn't do
-// the perfect job. In those cases, decode_instruction0 may kick in
-// and do it right.
-// If nothing had to be done, just return "here", otherwise return "here + instr_len(here)"
-static address decode_instruction0(address here, outputStream* st, address virtual_begin = NULL) {
-  return here;
+static const char* pd_cpu_opts() {
+  return "";
 }
 
-// platform-specific instruction annotations (like value of loaded constants)
-static void annotate(address pc, outputStream* st) { };
-
 #endif // CPU_RISCV_DISASSEMBLER_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
index d489638a15f..d4fcbdcbbde 100644
--- a/src/hotspot/cpu/riscv/frame_riscv.cpp
+++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -29,7 +29,6 @@
 #include "interpreter/interpreter.hpp"
 #include "memory/resourceArea.hpp"
 #include "memory/universe.hpp"
-#include "oops/markOop.hpp"
 #include "oops/method.hpp"
 #include "oops/oop.inline.hpp"
 #include "prims/methodHandles.hpp"
@@ -56,19 +55,19 @@ void RegisterMap::check_location_valid() {
 // Profiling/safepoint support
 
 bool frame::safe_for_sender(JavaThread *thread) {
-  address   addr_sp = (address)_sp;
-  address   addr_fp = (address)_fp;
+  address   sp = (address)_sp;
+  address   fp = (address)_fp;
   address   unextended_sp = (address)_unextended_sp;
 
   // consider stack guards when trying to determine "safe" stack pointers
   static size_t stack_guard_size = os::uses_stack_guard_pages() ?
                                    (JavaThread::stack_red_zone_size() + JavaThread::stack_yellow_zone_size()) : 0;
-  assert_cond(thread != NULL);
   size_t usable_stack_size = thread->stack_size() - stack_guard_size;
 
   // sp must be within the usable part of the stack (not in guards)
-  bool sp_safe = (addr_sp < thread->stack_base()) &&
-                 (addr_sp >= thread->stack_base() - usable_stack_size);
+  bool sp_safe = (sp < thread->stack_base()) &&
+                 (sp >= thread->stack_base() - usable_stack_size);
+
 
   if (!sp_safe) {
     return false;
@@ -95,8 +94,7 @@ bool frame::safe_for_sender(JavaThread *thread) {
 
   // an fp must be within the stack and above (but not equal) sp
   // second evaluation on fp+ is added to handle situation where fp is -1
-  bool fp_safe = (addr_fp < thread->stack_base() && (addr_fp > addr_sp) &&
-                  (((addr_fp + (return_addr_offset * sizeof(void*))) < thread->stack_base())));
+  bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (return_addr_offset * sizeof(void*))) < thread->stack_base())));
 
   // We know sp/unextended_sp are safe only fp is questionable here
 
@@ -104,7 +102,7 @@ bool frame::safe_for_sender(JavaThread *thread) {
   // to construct the sender and do some validation of it. This goes a long way
   // toward eliminating issues when we get in frame construction code
 
-  if (_cb != NULL ) {
+  if (_cb != NULL) {
 
     // First check if frame is complete and tester is reliable
     // Unfortunately we can only check frame complete for runtime stubs and nmethod
@@ -139,14 +137,13 @@ bool frame::safe_for_sender(JavaThread *thread) {
         return false;
       }
 
-      sender_pc = (address) this->fp()[return_addr_offset];
+      sender_pc = (address)this->fp()[return_addr_offset];
       // for interpreted frames, the value below is the sender "raw" sp,
       // which can be different from the sender unextended sp (the sp seen
       // by the sender) because of current frame local variables
       sender_sp = (intptr_t*) addr_at(sender_sp_offset);
       sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset];
       saved_fp = (intptr_t*) this->fp()[link_offset];
-
     } else {
       // must be some sort of compiled/runtime frame
       // fp does not have to be safe (although it could be check for c1?)
@@ -161,10 +158,10 @@ bool frame::safe_for_sender(JavaThread *thread) {
       if ((address)sender_sp >= thread->stack_base()) {
         return false;
       }
+
       sender_unextended_sp = sender_sp;
-      sender_pc = (address) *(sender_sp-1);
-      // Note: frame::sender_sp_offset is only valid for compiled frame
-      saved_fp = (intptr_t*) *(sender_sp - frame::sender_sp_offset);
+      sender_pc = (address) *(sender_sp - 1);
+      saved_fp = (intptr_t*) *(sender_sp - 2);
     }
 
 
@@ -182,16 +179,14 @@ bool frame::safe_for_sender(JavaThread *thread) {
       }
 
       // construct the potential sender
-
       frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
 
       return sender.is_interpreted_frame_valid(thread);
-
     }
 
     // We must always be able to find a recognizable pc
     CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc);
-    if (sender_pc == NULL ||  sender_blob == NULL) {
+    if (sender_pc == NULL || sender_blob == NULL) {
       return false;
     }
 
@@ -219,7 +214,6 @@ bool frame::safe_for_sender(JavaThread *thread) {
       }
 
       // construct the potential sender
-
       frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
 
       // Validate the JavaCallWrapper an entry frame must have
@@ -240,7 +234,6 @@ bool frame::safe_for_sender(JavaThread *thread) {
 
     // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size
     // because the return address counts against the callee's frame.
-
     if (sender_blob->frame_size() <= 0) {
       assert(!sender_blob->is_compiled(), "should count return address at least");
       return false;
@@ -250,7 +243,6 @@ bool frame::safe_for_sender(JavaThread *thread) {
     // code cache (current frame) is called by an entity within the code cache that entity
     // should not be anything but the call stub (already covered), the interpreter (already covered)
     // or an nmethod.
-
     if (!sender_blob->is_compiled()) {
         return false;
     }
@@ -266,20 +258,17 @@ bool frame::safe_for_sender(JavaThread *thread) {
 
   // Must be native-compiled frame. Since sender will try and use fp to find
   // linkages it must be safe
-
   if (!fp_safe) {
     return false;
   }
 
   // Will the pc we fetch be non-zero (which we'll find at the oldest frame)
-
-  if ((address) this->fp()[c_frame_return_addr_offset] == NULL) { return false; }
+  if ((address)this->fp()[return_addr_offset] == NULL) { return false; }
 
   return true;
 }
 
 void frame::patch_pc(Thread* thread, address pc) {
-  assert(_cb == CodeCache::find_blob(pc), "unexpected pc");
   address* pc_addr = &(((address*) sp())[-1]);
   if (TracePcPatching) {
     tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]",
@@ -289,6 +278,7 @@ void frame::patch_pc(Thread* thread, address pc) {
   // patch in the same address that's already there.
   assert(_pc == *pc_addr || pc == *pc_addr, "must be");
   *pc_addr = pc;
+  _cb = CodeCache::find_blob(pc);
   address original_pc = CompiledMethod::get_deopt_original_pc(this);
   if (original_pc != NULL) {
     assert(original_pc == _pc, "expected original PC to be stored before patching");
@@ -395,7 +385,7 @@ void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp
 //------------------------------------------------------------------------------
 // frame::adjust_unextended_sp
 void frame::adjust_unextended_sp() {
-  // On riscv64, sites calling method handle intrinsics and lambda forms are treated
+  // On riscv, sites calling method handle intrinsics and lambda forms are treated
   // as any other call site. Therefore, no special action is needed when we are
   // returning to any of these call sites.
 
@@ -464,9 +454,9 @@ frame frame::sender_for_compiled_frame(RegisterMap* map) const {
   intptr_t* unextended_sp = l_sender_sp;
 
   // the return_address is always the word on the stack
-  address sender_pc = (address) *(l_sender_sp-1);
+  address sender_pc = (address) *(l_sender_sp + frame::return_addr_offset);
 
-  intptr_t** saved_fp_addr = (intptr_t**) (l_sender_sp - frame::sender_sp_offset);
+  intptr_t** saved_fp_addr = (intptr_t**) (l_sender_sp + frame::link_offset);
 
   assert(map != NULL, "map must be set");
   if (map->update_map()) {
@@ -489,8 +479,8 @@ frame frame::sender_for_compiled_frame(RegisterMap* map) const {
 }
 
 //------------------------------------------------------------------------------
-// frame::sender_raw
-frame frame::sender_raw(RegisterMap* map) const {
+// frame::sender
+frame frame::sender(RegisterMap* map) const {
   // Default is we done have to follow them. The sender_for_xxx will
   // update it accordingly
   assert(map != NULL, "map must be set");
@@ -515,10 +505,6 @@ frame frame::sender_raw(RegisterMap* map) const {
   return frame(sender_sp(), link(), sender_pc());
 }
 
-frame frame::sender(RegisterMap* map) const {
-  return sender_raw(map);
-}
-
 bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
   assert(is_interpreted_frame(), "Not an interpreted frame");
   // These are reasonable sanity checks
@@ -540,13 +526,12 @@ bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
   // do some validation of frame elements
 
   // first the method
-
   Method* m = *interpreter_frame_method_addr();
-
   // validate the method we'd find in this potential sender
   if (!Method::is_valid_method(m)) {
     return false;
   }
+
   // stack frames shouldn't be much larger than max_stack elements
   // this test requires the use of unextended_sp which is the sp as seen by
   // the current frame, and not sp which is the "raw" pc which could point
@@ -557,7 +542,7 @@ bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
   }
 
   // validate bci/bcx
-  address  bcp    = interpreter_frame_bcp();
+  address bcp = interpreter_frame_bcp();
   if (m->validate_bci_from_bcp(bcp) < 0) {
     return false;
   }
@@ -567,12 +552,22 @@ bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
   if (MetaspaceObj::is_valid(cp) == false) {
     return false;
   }
+
   // validate locals
-  address locals =  (address) *interpreter_frame_locals_addr();
+  address locals = (address) *interpreter_frame_locals_addr();
+  if (locals > thread->stack_base()) {
+    return false;
+  }
 
-  if (locals > thread->stack_base() || locals < (address) fp()) {
+  if (m->max_locals() > 0 && locals < (address) fp()) {
+    // fp in interpreter frame on RISC-V is higher than that on AArch64,
+    // pointing to sender_sp and sender_sp-2 relatively.
+    // On RISC-V, if max_locals is 0, the 'locals' pointer may be below fp,
+    // pointing to sender_sp-1 (with one padding slot).
+    // So we verify the 'locals' pointer only if max_locals > 0.
     return false;
   }
+
   // We'd have to be pretty unlucky to be mislead at this point
   return true;
 }
@@ -652,7 +647,7 @@ void frame::describe_pd(FrameValues& values, int frame_no) {
 #endif
 
 intptr_t *frame::initial_deoptimization_info() {
-  // Not used on riscv64, but we must return something.
+  // Not used on riscv, but we must return something.
   return NULL;
 }
 
diff --git a/src/hotspot/cpu/riscv/frame_riscv.hpp b/src/hotspot/cpu/riscv/frame_riscv.hpp
index 7829a8b9f02..18e021dcb94 100644
--- a/src/hotspot/cpu/riscv/frame_riscv.hpp
+++ b/src/hotspot/cpu/riscv/frame_riscv.hpp
@@ -1,7 +1,6 @@
 /*
  * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -56,10 +55,10 @@
 //    [last esp              ]                   = last_sp()            last_sp_offset
 //    [old stack pointer     ]                     (sender_sp)          sender_sp_offset
 
-//    [old frame pointer     ]   <- fp           = link()
+//    [old frame pointer     ]
 //    [return pc             ]
 
-//    [last sp               ]
+//    [last sp               ]   <- fp           = link()
 //    [oop temp              ]                     (only for native calls)
 
 //    [padding               ]                     (to preserve machine SP alignment)
@@ -107,18 +106,14 @@
  public:
   enum {
     pc_return_offset                                 =  0,
-    // C frames
-    c_frame_link_offset                              = -2,
-    c_frame_return_addr_offset                       = -1,
-    c_frame_sender_sp_offset                         =  0,
-    // Java frames
-    link_offset                                      =  0,
-    return_addr_offset                               =  1,
-    sender_sp_offset                                 =  2,
+    // All frames
+    link_offset                                      = -2,
+    return_addr_offset                               = -1,
+    sender_sp_offset                                 =  0,
     // Interpreter frames
-    interpreter_frame_oop_temp_offset                =  3, // for native calls only
+    interpreter_frame_oop_temp_offset                =  1, // for native calls only
 
-    interpreter_frame_sender_sp_offset               = -1,
+    interpreter_frame_sender_sp_offset               = -3,
     // outgoing sp before a call to an invoked method
     interpreter_frame_last_sp_offset                 = interpreter_frame_sender_sp_offset - 1,
     interpreter_frame_method_offset                  = interpreter_frame_last_sp_offset - 1,
@@ -136,8 +131,8 @@
     // Entry frames
     // n.b. these values are determined by the layout defined in
     // stubGenerator for the Java call stub
-    entry_frame_after_call_words                     =  32,
-    entry_frame_call_wrapper_offset                  = -8,
+    entry_frame_after_call_words                     =  34,
+    entry_frame_call_wrapper_offset                  = -10,
 
     // we don't need a save area
     arg_reg_save_area_bytes                          =  0
@@ -190,12 +185,6 @@
 
   inline address* sender_pc_addr() const;
 
-  // C frame methods
-  inline intptr_t* c_frame_link() const;
-  inline address*  c_frame_sender_pc_addr() const;
-  inline address   c_frame_sender_pc() const;
-  inline intptr_t* c_frame_sender_sp() const;
-
   // expression stack tos if we are nested in a java call
   intptr_t* interpreter_frame_last_sp() const;
 
@@ -207,7 +196,4 @@
 
   static jint interpreter_frame_expression_stack_direction() { return -1; }
 
-  // returns the sending frame, without applying any barriers
-  frame sender_raw(RegisterMap* map) const;
-
 #endif // CPU_RISCV_FRAME_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/frame_riscv.inline.hpp b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp
index c9449c3254d..abd5bda7e49 100644
--- a/src/hotspot/cpu/riscv/frame_riscv.inline.hpp
+++ b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -30,7 +30,7 @@
 #include "code/codeCache.hpp"
 #include "code/vmreg.inline.hpp"
 
-// Inline functions for RISCV64 frames:
+// Inline functions for RISCV frames:
 
 // Constructors:
 
@@ -143,6 +143,11 @@ inline bool frame::is_older(intptr_t* id) const   { assert(this->id() != NULL &&
 
 inline intptr_t* frame::link() const              { return (intptr_t*) *(intptr_t **)addr_at(link_offset); }
 
+inline intptr_t* frame::link_or_null() const {
+  intptr_t** ptr = (intptr_t **)addr_at(link_offset);
+  return os::is_readable_pointer(ptr) ? *ptr : NULL;
+}
+
 inline intptr_t* frame::unextended_sp() const     { return _unextended_sp; }
 
 // Return address
@@ -150,12 +155,6 @@ inline address* frame::sender_pc_addr() const     { return (address*) addr_at(re
 inline address  frame::sender_pc() const          { return *sender_pc_addr(); }
 inline intptr_t* frame::sender_sp() const         { return addr_at(sender_sp_offset); }
 
-// C frame methods
-inline intptr_t* frame::c_frame_link() const      { return (intptr_t*) *(intptr_t **)addr_at(c_frame_link_offset); }
-inline address*  frame::c_frame_sender_pc_addr() const { return (address*) addr_at(c_frame_return_addr_offset); }
-inline address   frame::c_frame_sender_pc() const { return *c_frame_sender_pc_addr(); }
-inline intptr_t* frame::c_frame_sender_sp() const { return addr_at(c_frame_sender_sp_offset); }
-
 inline intptr_t** frame::interpreter_frame_locals_addr() const {
   return (intptr_t**)addr_at(interpreter_frame_locals_offset);
 }
@@ -233,21 +232,14 @@ inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const {
 // Compiled frames
 inline oop frame::saved_oop_result(RegisterMap* map) const {
   oop* result_adr = (oop *)map->location(x10->as_VMReg());
-  if(result_adr != NULL) {
-    return (*result_adr);
-  } else {
-    ShouldNotReachHere();
-    return NULL;
-  }
+  guarantee(result_adr != NULL, "bad register save location");
+  return (*result_adr);
 }
 
 inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) {
   oop* result_adr = (oop *)map->location(x10->as_VMReg());
-  if(result_adr != NULL) {
-    *result_adr = obj;
-  } else {
-    ShouldNotReachHere();
-  }
+  guarantee(result_adr != NULL, "bad register save location");
+  *result_adr = obj;
 }
 
 #endif // CPU_RISCV_FRAME_RISCV_INLINE_HPP
diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
index 2a95e72c937..e191cbcee2a 100644
--- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
@@ -157,21 +157,15 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
   __ j(done);
 
   __ bind(runtime);
-  // save the live input values
-  RegSet saved = RegSet::of(pre_val);
-  if (tosca_live) { saved += RegSet::of(x10); }
-  if (obj != noreg) { saved += RegSet::of(obj); }
-
-  __ push_reg(saved, sp);
 
+  __ push_call_clobbered_registers();
   if (expand_call) {
     assert(pre_val != c_rarg1, "smashed arg");
     __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
   } else {
     __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
   }
-
-  __ pop_reg(saved, sp);
+  __ pop_call_clobbered_registers();
 
   __ bind(done);
 
@@ -196,6 +190,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
   BarrierSet* bs = BarrierSet::barrier_set();
   CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
   CardTable* ct = ctbs->card_table();
+  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
 
   Label done;
   Label runtime;
@@ -213,6 +208,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
   // storing region crossing non-NULL, is card already dirty?
 
   ExternalAddress cardtable((address) ct->byte_map_base());
+  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
   const Register card_addr = tmp;
 
   __ srli(card_addr, store_addr, CardTable::card_shift);
@@ -265,7 +261,7 @@ void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorator
   bool on_reference = on_weak || on_phantom;
   ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
   if (on_oop && on_reference) {
-    // LR is live.  It must be saved around calls.
+    // RA is live.  It must be saved around calls.
     __ enter(); // barrier may call runtime
     // Generate the G1 pre-barrier code to log the value of
     // the referent field in an SATB buffer.
@@ -338,8 +334,7 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier
   Register pre_val_reg = stub->pre_val()->as_register();
 
   if (stub->do_load()) {
-    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(),
-                false /* wide */, false /* unaligned */);
+    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */, false /*unaligned*/);
   }
   __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true);
   ce->store_parameter(stub->pre_val()->as_register(), 0);
@@ -420,6 +415,7 @@ void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler*
   BarrierSet* bs = BarrierSet::barrier_set();
   CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
   CardTable* ct = ctbs->card_table();
+  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
 
   Label done;
   Label runtime;
@@ -432,8 +428,8 @@ void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler*
   Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
 
   const Register card_offset = t1;
-  // LR is free here, so we can use it to hold the byte_map_base.
-  const Register byte_map_base = lr;
+  // RA is free here, so we can use it to hold the byte_map_base.
+  const Register byte_map_base = ra;
 
   assert_different_registers(card_offset, byte_map_base, t0);
 
@@ -464,8 +460,8 @@ void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler*
   __ sub(t0, t0, wordSize);
   __ sd(t0, queue_index);
 
-  // Reuse LR to hold buffer_addr
-  const Register buffer_addr = lr;
+  // Reuse RA to hold buffer_addr
+  const Register buffer_addr = ra;
 
   __ ld(buffer_addr, buffer);
   __ add(t0, buffer_addr, t0);
diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp
index 33a3856f882..37bc183f39c 100644
--- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp
@@ -23,8 +23,8 @@
  *
  */
 
-#ifndef CPU_RISCV64_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP
-#define CPU_RISCV64_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP
+#ifndef CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP
+#define CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP
 
 #include "asm/macroAssembler.hpp"
 #include "gc/shared/modRefBarrierSetAssembler.hpp"
@@ -75,4 +75,4 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
                Register dst, Address src, Register tmp1, Register tmp_thread);
 };
 
-#endif // CPU_RISCV64_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP
+#endif // CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp
index 3568a54fac6..8735fd014ff 100644
--- a/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp
+++ b/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp
@@ -20,6 +20,7 @@
  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  * or visit www.oracle.com if you need additional information or have any
  * questions.
+ *
  */
 
 #ifndef CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
index f6721636d84..2b556b95d71 100644
--- a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,7 +27,6 @@
 #include "classfile/classLoaderData.hpp"
 #include "gc/shared/barrierSet.hpp"
 #include "gc/shared/barrierSetAssembler.hpp"
-#include "gc/shared/barrierSetNMethod.hpp"
 #include "gc/shared/collectedHeap.hpp"
 #include "interpreter/interp_masm.hpp"
 #include "memory/universe.hpp"
@@ -42,7 +41,7 @@ void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators,
                                   Register dst, Address src, Register tmp1, Register tmp_thread) {
   assert_cond(masm != NULL);
 
-  // LR is live. It must be saved around calls.
+  // RA is live. It must be saved around calls.
 
   bool in_heap = (decorators & IN_HEAP) != 0;
   bool in_native = (decorators & IN_NATIVE) != 0;
@@ -176,16 +175,24 @@ void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj,
   } else {
     Register end = tmp1;
     Label retry;
-    int32_t offset = 0;
     __ bind(retry);
 
-    Register tmp = t0;
+    // Get the current end of the heap
+    ExternalAddress address_end((address) Universe::heap()->end_addr());
+    {
+      int32_t offset;
+      __ la_patchable(t1, address_end, offset);
+      __ ld(t1, Address(t1, offset));
+    }
 
     // Get the current top of the heap
     ExternalAddress address_top((address) Universe::heap()->top_addr());
-    __ la_patchable(tmp, address_top, offset);
-    __ addi(tmp, tmp, offset);
-    __ lr_d(obj, tmp, Assembler::aqrl);
+    {
+      int32_t offset;
+      __ la_patchable(t0, address_top, offset);
+      __ addi(t0, t0, offset);
+      __ lr_d(obj, t0, Assembler::aqrl);
+    }
 
     // Adjust it my the size of our new object
     if (var_size_in_bytes == noreg) {
@@ -197,18 +204,12 @@ void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj,
     // if end < obj then we wrapped around high memory
     __ bltu(end, obj, slow_case, is_far);
 
-    Register heap_end = t1;
-    // Get the current end of the heap
-    ExternalAddress address_end((address) Universe::heap()->end_addr());
-    offset = 0;
-    __ la_patchable(heap_end, address_end, offset);
-    __ ld(heap_end, Address(heap_end, offset));
-
-    __ bgtu(end, heap_end, slow_case, is_far);
+    __ bgtu(end, t1, slow_case, is_far);
 
     // If heap_top hasn't been changed by some other thread, update it.
-    __ sc_d(t1, end, tmp, Assembler::rl);
+    __ sc_d(t1, end, t0, Assembler::rl);
     __ bnez(t1, retry);
+
     incr_allocated_bytes(masm, var_size_in_bytes, con_size_in_bytes, tmp1);
   }
 }
diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
index d0ab14d2aff..984d94f4c3d 100644
--- a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
@@ -23,12 +23,11 @@
  *
  */
 
-#ifndef CPU_RISCV64_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP
-#define CPU_RISCV64_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP
+#ifndef CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP
+#define CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP
 
 #include "asm/macroAssembler.hpp"
 #include "gc/shared/barrierSet.hpp"
-#include "gc/shared/barrierSetNMethod.hpp"
 #include "memory/allocation.hpp"
 #include "oops/access.hpp"
 
@@ -74,4 +73,4 @@ class BarrierSetAssembler: public CHeapObj<mtGC> {
   virtual ~BarrierSetAssembler() {}
 };
 
-#endif // CPU_RISCV64_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP
+#endif // CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
index ef51661e0db..81d47d61d4c 100644
--- a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -41,6 +41,10 @@ void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register ob
   BarrierSet* bs = BarrierSet::barrier_set();
   assert(bs->kind() == BarrierSet::CardTableBarrierSet, "Wrong barrier set kind");
 
+  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
+  CardTable* ct = ctbs->card_table();
+  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
+
   __ srli(obj, obj, CardTable::card_shift);
 
   assert(CardTable::dirty_card_val() == 0, "must be");
@@ -56,6 +60,9 @@ void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register ob
     __ sb(zr, Address(tmp));
     __ bind(L_already_dirty);
   } else {
+    if (ct->scanned_concurrently()) {
+      __ membar(MacroAssembler::StoreStore);
+    }
     __ sb(zr, Address(tmp));
   }
 }
@@ -66,12 +73,16 @@ void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembl
   assert_different_registers(start, tmp);
   assert_different_registers(count, tmp);
 
+  BarrierSet* bs = BarrierSet::barrier_set();
+  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
+  CardTable* ct = ctbs->card_table();
+
   Label L_loop, L_done;
   const Register end = count;
 
   __ beqz(count, L_done); // zero count - nothing to do
-  __ slli(count, count, LogBytesPerHeapOop);
-  __ add(end, start, count); // end = start + count << LogBytesPerHeapOop
+  // end = start + count << LogBytesPerHeapOop
+  __ shadd(end, count, start, count, LogBytesPerHeapOop);
   __ sub(end, end, BytesPerHeapOop); // last element address to make inclusive
 
   __ srli(start, start, CardTable::card_shift);
@@ -80,6 +91,9 @@ void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembl
 
   __ load_byte_map_base(tmp);
   __ add(start, start, tmp);
+  if (ct->scanned_concurrently()) {
+    __ membar(MacroAssembler::StoreStore);
+  }
 
   __ bind(L_loop);
   __ add(tmp, start, count);
diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp
index a9e8cf09fcb..686fe8fa478 100644
--- a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp
@@ -23,8 +23,8 @@
  *
  */
 
-#ifndef CPU_RISCV64_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP
-#define CPU_RISCV64_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP
+#ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP
+#define CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP
 
 #include "asm/macroAssembler.hpp"
 #include "gc/shared/modRefBarrierSetAssembler.hpp"
@@ -37,7 +37,6 @@ class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler {
                                                 Register start, Register count, Register tmp, RegSet saved_regs);
   virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
                             Address dst, Register val, Register tmp1, Register tmp2);
-
 };
 
-#endif // #ifndef CPU_RISCV64_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP
+#endif // #ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp
index 52c1e011088..00419c3163c 100644
--- a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp
@@ -23,8 +23,8 @@
  *
  */
 
-#ifndef CPU_RISCV64_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP
-#define CPU_RISCV64_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP
+#ifndef CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP
+#define CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP
 
 #include "asm/macroAssembler.hpp"
 #include "gc/shared/barrierSetAssembler.hpp"
@@ -52,4 +52,4 @@ class ModRefBarrierSetAssembler: public BarrierSetAssembler {
                         Address dst, Register val, Register tmp1, Register tmp2);
 };
 
-#endif // CPU_RISCV64_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP
+#endif // CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
index b2edba5f558..d19f5b859ce 100644
--- a/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
+++ b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
@@ -26,6 +26,7 @@
 #include "precompiled.hpp"
 #include "c1/c1_LIRAssembler.hpp"
 #include "c1/c1_MacroAssembler.hpp"
+#include "gc/shared/gc_globals.hpp"
 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
 #include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
@@ -50,15 +51,7 @@ void LIR_OpShenandoahCompareAndSwap::emit_code(LIR_Assembler* masm) {
   }
 
   ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm->masm(), addr, cmpval, newval, /* acquire */ Assembler::aq,
-          /* release */ Assembler::rl, /* is_cae */ false, result);
-  if (UseBarriersForVolatile) {
-    // The membar here is necessary to prevent reordering between the
-    // release store in the CAS above and a subsequent volatile load.
-    // However for !UseBarriersForVolatile, C1 inserts a full barrier before
-    // volatile loads which means we don't need an additional barrier
-    // here (see LIRGenerator::volatile_field_load()).
-    __ membar(MacroAssembler::AnyAny);
-  }
+                                                 /* release */ Assembler::rl, /* is_cae */ false, result);
 }
 
 #undef __
diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
index e75e2d1bfdf..b8534c52e77 100644
--- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
@@ -50,8 +50,8 @@ void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, Dec
                                                        Register src, Register dst, Register count, RegSet saved_regs) {
   if (is_oop) {
     bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
-    if ((ShenandoahSATBBarrier && !dest_uninitialized) ||
-        ShenandoahIUBarrier || ShenandoahLoadRefBarrier) {
+    if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahIUBarrier || ShenandoahLoadRefBarrier) {
+
       Label done;
 
       // Avoid calling runtime if count == 0
@@ -118,10 +118,10 @@ void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm,
   Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
 
   // Is marking active?
-  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
+  if (in_bytes(ShenandoahSATBMarkQueue::byte_width_of_active()) == 4) {
     __ lwu(tmp, in_progress);
   } else {
-    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
+    assert(in_bytes(ShenandoahSATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
     __ lbu(tmp, in_progress);
   }
   __ beqz(tmp, done);
@@ -201,7 +201,7 @@ void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssemb
   // - Test lowest two bits == 0
   // - If so, set the lowest two bits
   // - Invert the result back, and copy to dst
-  RegSet savedRegs = RegSet::of(t2);
+  RegSet saved_regs = RegSet::of(t2);
   bool borrow_reg = (tmp == noreg);
   if (borrow_reg) {
     // No free registers available. Make one useful.
@@ -209,11 +209,11 @@ void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssemb
     if (tmp == dst) {
       tmp = t1;
     }
-    savedRegs += RegSet::of(tmp);
+    saved_regs += RegSet::of(tmp);
   }
 
   assert_different_registers(tmp, dst, t2);
-  __ push_reg(savedRegs, sp);
+  __ push_reg(saved_regs, sp);
 
   Label done;
   __ ld(tmp, Address(dst, oopDesc::mark_offset_in_bytes()));
@@ -224,11 +224,12 @@ void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssemb
   __ xori(dst, tmp, -1); // eon with 0 is equivalent to XOR with -1
   __ bind(done);
 
-  __ pop_reg(savedRegs, sp);
+  __ pop_reg(saved_regs, sp);
 }
 
 void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm,
-                                                                    Register dst, Address load_addr) {
+                                                                    Register dst,
+                                                                    Address load_addr) {
   assert(ShenandoahLoadRefBarrier, "Should be enabled");
   assert(dst != t1 && load_addr.base() != t1, "need t1");
   assert_different_registers(load_addr.base(), t0, t1);
@@ -250,15 +251,15 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembl
   }
 
   // Save x10 and x11, unless it is an output register
-  RegSet to_save = RegSet::of(x10, x11) - result_dst;
-  __ push_reg(to_save, sp);
+  RegSet saved_regs = RegSet::of(x10, x11) - result_dst;
+  __ push_reg(saved_regs, sp);
   __ la(x11, load_addr);
   __ mv(x10, dst);
 
   __ far_call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb())));
 
   __ mv(result_dst, x10);
-  __ pop_reg(to_save, sp);
+  __ pop_reg(saved_regs, sp);
 
   __ bind(done);
   __ leave();
@@ -267,7 +268,9 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembl
 void ShenandoahBarrierSetAssembler::iu_barrier(MacroAssembler* masm, Register dst, Register tmp) {
   if (ShenandoahIUBarrier) {
     __ push_call_clobbered_registers();
+
     satb_write_barrier_pre(masm, noreg, dst, xthread, tmp, true, false);
+
     __ pop_call_clobbered_registers();
   }
 }
@@ -311,16 +314,14 @@ void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm,
 
   // 2: load a reference from src location and apply LRB if needed
   if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) {
-    guarantee(dst != x30 && src.base() != x30, "load_at need x30");
-    bool ist5 = (dst == src.base());
-    if (ist5) {
-      __ push_reg(RegSet::of(x30), sp);
-    }
     Register result_dst = dst;
 
     // Preserve src location for LRB
+    RegSet saved_regs;
     if (dst == src.base()) {
-      dst = x30;
+      dst = (src.base() == x28) ? x29 : x28;
+      saved_regs = RegSet::of(dst);
+      __ push_reg(saved_regs, sp);
     }
     assert_different_registers(dst, src.base());
 
@@ -333,8 +334,8 @@ void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm,
       dst = result_dst;
     }
 
-    if (ist5) {
-      __ pop_reg(RegSet::of(x30), sp);
+    if (saved_regs.bits() != 0) {
+      __ pop_reg(saved_regs, sp);
     }
   } else {
     BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
@@ -432,39 +433,10 @@ void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler
 // from-space, or it refers to the to-space version of an object that
 // is being evacuated out of from-space.
 //
-// By default, this operation implements sequential consistency and the
-// value held in the result register following execution of the
-// generated code sequence is 0 to indicate failure of CAS, non-zero
-// to indicate success.  Arguments support variations on this theme:
-//
-//  acquire: Allow relaxation of the memory ordering on CAS from
-//           sequential consistency.  This can be useful when
-//           sequential consistency is not required, such as when
-//           another sequentially consistent operation is already
-//           present in the execution stream.  If acquire, successful
-//           execution has the side effect of assuring that memory
-//           values updated by other threads and "released" will be
-//           visible to any read operations perfomed by this thread
-//           which follow this operation in program order.  This is a
-//           special optimization that should not be enabled by default.
-//  release: Allow relaxation of the memory ordering on CAS from
-//           sequential consistency.  This can be useful when
-//           sequential consistency is not required, such as when
-//           another sequentially consistent operation is already
-//           present in the execution stream.  If release, successful
-//           completion of this operation has the side effect of
-//           assuring that all writes to memory performed by this
-//           thread that precede this operation in program order are
-//           visible to all other threads that subsequently "acquire"
-//           before reading the respective memory values.  This is a
-//           special optimization that should not be enabled by default.
-//  is_cae:  This turns CAS (compare and swap) into CAE (compare and
-//           exchange).  This HotSpot convention is that CAE makes
-//           available to the caller the "failure witness", which is
-//           the value that was stored in memory which did not match
-//           the expected value.  If is_cae, the result is the value
-//           most recently fetched from addr rather than a boolean
-//           success indicator.
+// By default the value held in the result register following execution
+// of the generated code sequence is 0 to indicate failure of CAS,
+// non-zero to indicate success. If is_cae, the result is the value most
+// recently fetched from addr rather than a boolean success indicator.
 //
 // Clobbers t0, t1
 void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm,
@@ -547,8 +519,7 @@ void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, Shen
   Register pre_val_reg = stub->pre_val()->as_register();
 
   if (stub->do_load()) {
-    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(),
-                stub->info(), false /* wide */, false /* unaligned */);
+    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */, false /*unaligned*/);
   }
   __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true);
   ce->store_parameter(stub->pre_val()->as_register(), 0);
@@ -660,12 +631,13 @@ void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_s
   __ push_call_clobbered_registers();
   __ load_parameter(0, x10);
   __ load_parameter(1, x11);
+
   if (UseCompressedOops) {
-    __ mv(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow));
+    __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow));
   } else {
-    __ mv(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier));
+    __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier));
   }
-  __ jalr(lr);
+  __ jalr(ra);
   __ mv(t0, x10);
   __ pop_call_clobbered_registers();
   __ mv(x10, t0);
@@ -714,11 +686,11 @@ address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator
   __ push_call_clobbered_registers();
 
   if (UseCompressedOops) {
-    __ mv(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow));
+    __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow));
   } else {
-    __ mv(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier));
+    __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier));
   }
-  __ jalr(lr);
+  __ jalr(ra);
   __ mv(t0, x10);
   __ pop_call_clobbered_registers();
   __ mv(x10, t0);
diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
index 9bd95227fd1..5d75035e9d4 100644
--- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
@@ -28,6 +28,7 @@
 
 #include "asm/macroAssembler.hpp"
 #include "gc/shared/barrierSetAssembler.hpp"
+#include "gc/shenandoah/shenandoahBarrierSet.hpp"
 #ifdef COMPILER1
 class LIR_Assembler;
 class ShenandoahPreBarrierStub;
@@ -37,31 +38,6 @@ class StubAssembler;
 class StubCodeGenerator;
 
 class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
-public:
-  static address shenandoah_lrb();
-
-  void iu_barrier(MacroAssembler *masm, Register dst, Register tmp);
-
-#ifdef COMPILER1
-  void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub);
-  void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub);
-  void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
-  void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm);
-#endif
-
-  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
-                                  Register src, Register dst, Register count, RegSet saved_regs);
-  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-                       Register dst, Address src, Register tmp1, Register tmp_thread);
-  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-                        Address dst, Register val, Register tmp1, Register tmp2);
-  virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
-                                             Register obj, Register tmp, Label& slowpath);
-  virtual void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val,
-                           Assembler::Aqrl acquire, Assembler::Aqrl release, bool is_cae, Register result);
-
-  virtual void barrier_stubs_init();
-
 private:
 
   static address _shenandoah_lrb;
@@ -87,6 +63,35 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
   void load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address load_addr);
 
   address generate_shenandoah_lrb(StubCodeGenerator* cgen);
+
+public:
+
+  static address shenandoah_lrb();
+
+  void iu_barrier(MacroAssembler* masm, Register dst, Register tmp);
+
+#ifdef COMPILER1
+  void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub);
+  void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub);
+  void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
+  void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm);
+#endif
+
+  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
+                                  Register src, Register dst, Register count, RegSet saved_regs);
+
+  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                       Register dst, Address src, Register tmp1, Register tmp_thread);
+  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                        Address dst, Register val, Register tmp1, Register tmp2);
+
+  virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
+                                             Register obj, Register tmp, Label& slowpath);
+
+  virtual void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val,
+                   Assembler::Aqrl acquire, Assembler::Aqrl release, bool is_cae, Register result);
+
+  virtual void barrier_stubs_init();
 };
 
 #endif // CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad
index 36855c3f9b8..bab407a8b76 100644
--- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad
+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad
@@ -71,7 +71,7 @@ instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, i
 %}
 
 instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
-  predicate(needs_acquiring_load_exclusive(n));
+  predicate(needs_acquiring_load_reserved(n));
   match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval)));
   ins_cost(10 * DEFAULT_COST);
 
@@ -93,7 +93,7 @@ instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval
 %}
 
 instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
-  predicate(needs_acquiring_load_exclusive(n));
+  predicate(needs_acquiring_load_reserved(n));
   match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval)));
   ins_cost(10 * DEFAULT_COST);
 
@@ -118,9 +118,11 @@ instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldva
   match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval)));
   ins_cost(10 * DEFAULT_COST);
   effect(TEMP_DEF res, TEMP tmp, KILL cr);
+
   format %{
     "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN_shenandoah"
   %}
+
   ins_encode %{
     Register tmp = $tmp$$Register;
     __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
@@ -128,6 +130,7 @@ instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldva
                                                    Assembler::relaxed /* acquire */, Assembler::rl /* release */,
                                                    true /* is_cae */, $res$$Register);
   %}
+
   ins_pipe(pipe_slow);
 %}
 
@@ -139,6 +142,7 @@ instruct compareAndExchangeP_shenandoah(iRegPNoSp res, indirect mem, iRegP oldva
   format %{
     "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndExchangeP_shenandoah"
   %}
+
   ins_encode %{
     Register tmp = $tmp$$Register;
     __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
@@ -146,6 +150,7 @@ instruct compareAndExchangeP_shenandoah(iRegPNoSp res, indirect mem, iRegP oldva
                                                    Assembler::relaxed /* acquire */, Assembler::rl /* release */,
                                                    true /* is_cae */, $res$$Register);
   %}
+
   ins_pipe(pipe_slow);
 %}
 
@@ -158,6 +163,7 @@ instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldva
     "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapN_shenandoah"
     "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)"
   %}
+
   ins_encode %{
     Register tmp = $tmp$$Register;
     __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
@@ -166,6 +172,7 @@ instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldva
                                                    Assembler::relaxed /* acquire */, Assembler::rl /* release */,
                                                    false /* is_cae */, $res$$Register);
   %}
+
   ins_pipe(pipe_slow);
 %}
 
@@ -177,6 +184,7 @@ instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldva
   format %{
     "cmpxchg_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapP_shenandoah"
   %}
+
   ins_encode %{
     Register tmp = $tmp$$Register;
     __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
@@ -184,5 +192,6 @@ instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldva
                                                    Assembler::relaxed /* acquire */, Assembler::rl /* release */,
                                                    false /* is_cae */, $res$$Register);
   %}
+
   ins_pipe(pipe_slow);
 %}
diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
index 20e5a20ee42..d7f261af3ff 100644
--- a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
+++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -33,12 +33,8 @@ const int StackAlignmentInBytes = 16;
 // 32-bit integer argument values are extended to 64 bits.
 const bool CCallingConventionRequiresIntsAsLongs = false;
 
-// RISC-V has adopted a multicopy atomic model closely following
-// that of ARMv8.
-#define CPU_MULTI_COPY_ATOMIC
-
-// We treat concurrent modification and execution of instructions
-// conservatively on RISC-V, just like it was done in aarch64 port.
+// To be safe, we deoptimize when we come across an access that needs
+// patching. This is similar to what is done on aarch64.
 #define DEOPTIMIZE_WHEN_PATCHING
 
 #define SUPPORTS_NATIVE_CX8
@@ -47,6 +43,10 @@ const bool CCallingConventionRequiresIntsAsLongs = false;
 
 #define THREAD_LOCAL_POLL
 
-#define COMPRESSED_CLASS_POINTERS_DEPENDS_ON_COMPRESSED_OOPS true
+#if INCLUDE_JVMCI
+#define COMPRESSED_CLASS_POINTERS_DEPENDS_ON_COMPRESSED_OOPS (EnableJVMCI || UseAOT)
+#else
+#define COMPRESSED_CLASS_POINTERS_DEPENDS_ON_COMPRESSED_OOPS false
+#endif
 
 #endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
index f0743a556b0..2ddb9e62feb 100644
--- a/src/hotspot/cpu/riscv/globals_riscv.hpp
+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
@@ -1,7 +1,6 @@
 /*
  * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2015, 2019, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -40,7 +39,7 @@ define_pd_global(bool, TrapBasedNullChecks,      false);
 define_pd_global(bool, UncommonNullCast,         true);  // Uncommon-trap NULLs past to check cast
 
 define_pd_global(uintx, CodeCacheSegmentSize,    64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment.
-define_pd_global(intx, CodeEntryAlignment,       16);
+define_pd_global(intx, CodeEntryAlignment,       64);
 define_pd_global(intx, OptoLoopAlignment,        16);
 define_pd_global(intx, InlineFrequencyCount,     100);
 
@@ -83,34 +82,31 @@ define_pd_global(bool, ThreadLocalHandshakes, true);
 
 define_pd_global(intx, InlineSmallCode,          1000);
 
-#define ARCH_FLAGS(develop,                                             \
-                   product,                                             \
-                   diagnostic,                                          \
-                   experimental,                                        \
-                   notproduct,                                          \
-                   range,                                               \
-                   constraint,                                          \
-                   writeable)                                           \
-                                                                        \
-  product(bool, NearCpool, true,                                        \
-          "constant pool is close to instructions")                     \
-  product(bool, UseBarriersForVolatile, false,                          \
-          "Use memory barriers to implement volatile accesses")         \
-  product(bool, UseCRC32, false,                                        \
-          "Use CRC32 instructions for CRC32 computation")               \
-  product(bool, UseBlockZeroing, true,                                  \
-          "Use DC ZVA for block zeroing")                               \
-  product(intx, BlockZeroingLowLimit, 256,                              \
-          "Minimum size in bytes when block zeroing will be used")      \
-          range(1, max_jint)                                            \
-  product(bool, TraceTraps, false, "Trace all traps the signal handler")\
-  product(bool, UseConservativeFence, true,                             \
-          "Extend i for r and o for w in the pred/succ flags of fence;" \
-          "Extend fence.i to fence.i + fence.")                         \
-  product(bool, AvoidUnalignedAccesses, true,                           \
-          "Avoid generating unaligned memory accesses")                 \
-  product(bool, UseRVV, false, "Use RVV instructions")                  \
-  product(bool, UseRVV071, false, "Use RVV 0.7.1 instructions")         \
-  product(bool, UseCSky, false, "Use CSky specific instructions")       \
+#define ARCH_FLAGS(develop,                                                      \
+                   product,                                                      \
+                   diagnostic,                                                   \
+                   experimental,                                                 \
+                   notproduct,                                                   \
+                   range,                                                        \
+                   constraint,                                                   \
+                   writeable)                                                    \
+                                                                                 \
+  product(bool, NearCpool, true,                                                 \
+         "constant pool is close to instructions")                               \
+  product(intx, BlockZeroingLowLimit, 256,                                       \
+          "Minimum size in bytes when block zeroing will be used")               \
+          range(1, max_jint)                                                     \
+  product(bool, TraceTraps, false, "Trace all traps the signal handler")         \
+  /* For now we're going to be safe and add the I/O bits to userspace fences. */ \
+  product(bool, UseConservativeFence, true,                                      \
+          "Extend i for r and o for w in the pred/succ flags of fence;"          \
+          "Extend fence.i to fence.i + fence.")                                  \
+  product(bool, AvoidUnalignedAccesses, true,                                    \
+          "Avoid generating unaligned memory accesses")                          \
+  experimental(bool, UseRVV, false, "Use RVV instructions")                      \
+  experimental(bool, UseRVV071, false, "Use RVV 0.7.1 instructions")             \
+  experimental(bool, UseRVB, false, "Use RVB instructions")                      \
+  experimental(bool, UseRVC, false, "Use RVC instructions")                      \
+  product(bool, UseCSky, false, "Use CSky specific instructions")                \
 
 #endif // CPU_RISCV_GLOBALS_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/icBuffer_riscv.cpp b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp
index 908f610cd75..cc93103dc55 100644
--- a/src/hotspot/cpu/riscv/icBuffer_riscv.cpp
+++ b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -36,7 +36,7 @@
 
 int InlineCacheBuffer::ic_stub_code_size() {
   // 6: auipc + ld + auipc + jalr + address(2 * instruction_size)
-  // 5: auipc + ld + j + address(2 * instruction_size )
+  // 5: auipc + ld + j + address(2 * instruction_size)
   return (MacroAssembler::far_branches() ? 6 : 5) * NativeInstruction::instruction_size;
 }
 
@@ -47,7 +47,7 @@ void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached
   ResourceMark rm;
   CodeBuffer      code(code_begin, ic_stub_code_size());
   MacroAssembler* masm            = new MacroAssembler(&code);
-  // note: even though the code contains an embedded value, we do not need reloc info
+  // Note: even though the code contains an embedded value, we do not need reloc info
   // because
   // (1) the value is old (i.e., doesn't matter for scavenges)
   // (2) these ICStubs are removed *before* a GC happens, so the roots disappear
diff --git a/src/hotspot/cpu/riscv/icache_riscv.cpp b/src/hotspot/cpu/riscv/icache_riscv.cpp
index 7faa0d9d32b..922a80f9f3e 100644
--- a/src/hotspot/cpu/riscv/icache_riscv.cpp
+++ b/src/hotspot/cpu/riscv/icache_riscv.cpp
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -35,9 +35,7 @@ static int icache_flush(address addr, int lines, int magic) {
 }
 
 void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) {
-
   address start = (address)icache_flush;
-
   *flush_icache_stub = (ICache::flush_icache_stub_t)start;
 
   // ICache::invalidate_range() contains explicit condition that the first
diff --git a/src/hotspot/cpu/riscv/icache_riscv.hpp b/src/hotspot/cpu/riscv/icache_riscv.hpp
index ebdc4e56a4c..5bf40ca8204 100644
--- a/src/hotspot/cpu/riscv/icache_riscv.hpp
+++ b/src/hotspot/cpu/riscv/icache_riscv.hpp
@@ -26,14 +26,14 @@
 #ifndef CPU_RISCV_ICACHE_RISCV_HPP
 #define CPU_RISCV_ICACHE_RISCV_HPP
 
-// Interface for updating the instruction cache.  Whenever the VM
+// Interface for updating the instruction cache. Whenever the VM
 // modifies code, part of the processor instruction cache potentially
 // has to be flushed.
 
 class ICache : public AbstractICache {
 public:
   enum {
-    stub_size      = 16,                 // Size of the icache flush stub in bytes
+    stub_size      = 16,                // Size of the icache flush stub in bytes
     line_size      = BytesPerWord,      // conservative
     log2_line_size = LogBytesPerWord    // log2(line_size)
   };
diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
index d227397cafb..b50be7e726c 100644
--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -33,7 +33,6 @@
 #include "interpreter/interpreterRuntime.hpp"
 #include "logging/log.hpp"
 #include "oops/arrayOop.hpp"
-#include "oops/markOop.hpp"
 #include "oops/method.hpp"
 #include "oops/methodData.hpp"
 #include "prims/jvmtiExport.hpp"
@@ -67,17 +66,17 @@ void InterpreterMacroAssembler::narrow(Register result) {
   bind(notBool);
   mv(t1, T_BYTE);
   bne(t0, t1, notByte);
-  sign_ext(result, result, registerSize - 8);
+  sign_extend(result, result, 8);
   j(done);
 
   bind(notByte);
   mv(t1, T_CHAR);
   bne(t0, t1, notChar);
-  zero_ext(result, result, registerSize - 16); // turncate upper 48 bits
+  zero_extend(result, result, 16);
   j(done);
 
   bind(notChar);
-  sign_ext(result, result, registerSize - 16); // sign-extend short
+  sign_extend(result, result, 16);
 
   // Nothing to do for T_INT
   bind(done);
@@ -178,7 +177,7 @@ void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread)
 void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset) {
   assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode");
   lhu(reg, Address(xbcp, bcp_offset));
-  grev16(reg, reg);
+  revb_h(reg, reg);
 }
 
 void InterpreterMacroAssembler::get_dispatch() {
@@ -223,13 +222,12 @@ void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache,
   assert_different_registers(cache, xcpool);
   get_cache_index_at_bcp(index, bcp_offset, index_size);
   assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
-  // convert from field index to ConstantPoolCacheEntry
-  // riscv64 already has the cache in xcpool so there is no need to
-  // install it in cache. instead we pre-add the indexed offset to
+  // Convert from field index to ConstantPoolCacheEntry
+  // riscv already has the cache in xcpool so there is no need to
+  // install it in cache. Instead we pre-add the indexed offset to
   // xcpool and return it in cache. All clients of this method need to
   // be modified accordingly.
-  slli(cache, index, 5);
-  add(cache, xcpool, cache);
+  shadd(cache, index, xcpool, cache, 5);
 }
 
 
@@ -250,8 +248,8 @@ void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register
   lwu(bytecode, bytecode);
   membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
   const int shift_count = (1 + byte_no) * BitsPerByte;
-  slli(bytecode, bytecode, registerSize - (shift_count + BitsPerByte));
-  srli(bytecode, bytecode, registerSize - BitsPerByte);
+  slli(bytecode, bytecode, XLEN - (shift_count + BitsPerByte));
+  srli(bytecode, bytecode, XLEN - BitsPerByte);
 }
 
 void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache,
@@ -261,14 +259,15 @@ void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache,
   assert(cache != tmp, "must use different register");
   get_cache_index_at_bcp(tmp, bcp_offset, index_size);
   assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
-  // convert from field index to ConstantPoolCacheEntry index
+  // Convert from field index to ConstantPoolCacheEntry index
   // and from word offset to byte offset
-  assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line");
+  assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord,
+         "else change next line");
   ld(cache, Address(fp, frame::interpreter_frame_cache_offset * wordSize));
   // skip past the header
   add(cache, cache, in_bytes(ConstantPoolCache::base_offset()));
-  slli(tmp, tmp, 2 + LogBytesPerWord);
-  add(cache, cache, tmp);  // construct pointer to cache entry
+  // construct pointer to cache entry
+  shadd(cache, tmp, cache, tmp, 2 + LogBytesPerWord);
 }
 
 // Load object from cpool->resolved_references(index)
@@ -277,25 +276,22 @@ void InterpreterMacroAssembler::load_resolved_reference_at_index(
   assert_different_registers(result, index);
 
   get_constant_pool(result);
-  // load pointer for resolved_references[] objArray
+  // Load pointer for resolved_references[] objArray
   ld(result, Address(result, ConstantPool::cache_offset_in_bytes()));
   ld(result, Address(result, ConstantPoolCache::resolved_references_offset_in_bytes()));
   resolve_oop_handle(result, tmp);
   // Add in the index
   addi(index, index, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop);
-  slli(index, index, LogBytesPerHeapOop);
-  add(result, result, index);
+  shadd(result, index, result, index, LogBytesPerHeapOop);
   load_heap_oop(result, Address(result, 0));
 }
 
 void InterpreterMacroAssembler::load_resolved_klass_at_offset(
                                 Register cpool, Register index, Register klass, Register temp) {
-  slli(temp, index, LogBytesPerWord);
-  add(temp, temp, cpool);
+  shadd(temp, index, cpool, temp, LogBytesPerWord);
   lhu(temp, Address(temp, sizeof(ConstantPool))); // temp = resolved_klass_index
   ld(klass, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes())); // klass = cpool->_resolved_klasses
-  slli(temp, temp, LogBytesPerWord);
-  add(klass, klass, temp);
+  shadd(klass, temp, klass, temp, LogBytesPerWord);
   ld(klass, Address(klass, Array<Klass*>::base_offset_in_bytes()));
 }
 
@@ -507,23 +503,21 @@ void InterpreterMacroAssembler::dispatch_base(TosState state,
   Label safepoint;
   address* const safepoint_table = Interpreter::safept_table(state);
   bool needs_thread_local_poll = generate_poll &&
-                                 SafepointMechanism::uses_thread_local_poll() && table != safepoint_table;
+    SafepointMechanism::uses_thread_local_poll() && table != safepoint_table;
 
   if (needs_thread_local_poll) {
     NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
     ld(t1, Address(xthread, Thread::polling_page_offset()));
-    andi(t1, t1, 1 << exact_log2(SafepointMechanism::poll_bit()));
+    andi(t1, t1, SafepointMechanism::poll_bit());
     bnez(t1, safepoint);
   }
   if (table == Interpreter::dispatch_table(state)) {
     li(t1, Interpreter::distance_from_dispatch_table(state));
     add(t1, Rs, t1);
-    slli(t1, t1, 3);
-    add(t1, xdispatch, t1);
+    shadd(t1, t1, xdispatch, t1, 3);
   } else {
     mv(t1, (address)table);
-    slli(Rs, Rs, 3);
-    add(t1, t1, Rs);
+    shadd(t1, Rs, t1, Rs, 3);
   }
   ld(t1, Address(t1));
   jr(t1);
@@ -531,8 +525,7 @@ void InterpreterMacroAssembler::dispatch_base(TosState state,
   if (needs_thread_local_poll) {
     bind(safepoint);
     la(t1, ExternalAddress((address)safepoint_table));
-    slli(Rs, Rs, 3);
-    add(t1, t1, Rs);
+    shadd(t1, Rs, t1, Rs, 3);
     ld(t1, Address(t1));
     jr(t1);
   }
@@ -581,7 +574,7 @@ void InterpreterMacroAssembler::remove_activation(
                                 bool throw_monitor_exception,
                                 bool install_monitor_exception,
                                 bool notify_jvmdi) {
-  // Note: Registers x13 xmm0 may be in use for the
+  // Note: Registers x13 may be in use for the
   // result check if synchronized method
   Label unlocked, unlock, no_unlock;
 
@@ -773,7 +766,7 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg)
 
     Label slow_case;
 
-    // Load object pointer into obj_reg %c_rarg3
+    // Load object pointer into obj_reg c_rarg3
     ld(obj_reg, Address(lock_reg, obj_offset));
 
     if (UseBiasedLocking) {
@@ -791,7 +784,7 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg)
            "displached header must be first word in BasicObjectLock");
 
     if (PrintBiasedLockingStatistics) {
-      Label fast, fail;
+      Label fail, fast;
       cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, fast, &fail);
       bind(fast);
       atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()),
@@ -804,13 +797,13 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg)
 
     // Test if the oopMark is an obvious stack pointer, i.e.,
     //  1) (mark & 7) == 0, and
-    //  2) rsp <= mark < mark + os::pagesize()
+    //  2) sp <= mark < mark + os::pagesize()
     //
     // These 3 tests can be done by evaluating the following
-    // expression: ((mark - rsp) & (7 - os::vm_page_size())),
+    // expression: ((mark - sp) & (7 - os::vm_page_size())),
     // assuming both stack pointer and pagesize have their
     // least significant 3 bits clear.
-    // NOTE: the oopMark is in swap_reg %x10 as the result of cmpxchg
+    // NOTE: the oopMark is in swap_reg x10 as the result of cmpxchg
     sub(swap_reg, swap_reg, sp);
     li(t0, (int64_t)(7 - os::vm_page_size()));
     andr(swap_reg, swap_reg, t0);
@@ -853,7 +846,9 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg)
   assert(lock_reg == c_rarg1, "The argument is only for looks. It must be rarg1");
 
   if (UseHeavyMonitors) {
-    call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
+    call_VM(noreg,
+            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
+            lock_reg);
   } else {
     Label done;
 
@@ -864,10 +859,10 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg)
     save_bcp(); // Save in case of exception
 
     // Convert from BasicObjectLock structure to object and BasicLock
-    // structure Store the BasicLock address into %x10
+    // structure Store the BasicLock address into x10
     la(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes()));
 
-    // Load oop into obj_reg(%c_rarg3)
+    // Load oop into obj_reg(c_rarg3)
     ld(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
 
     // Free entry
@@ -889,7 +884,9 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg)
 
     // Call the runtime routine for slow case.
     sd(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj
-    call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
+    call_VM(noreg,
+            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
+            lock_reg);
 
     bind(done);
 
@@ -1473,7 +1470,7 @@ void InterpreterMacroAssembler::profile_switch_case(Register index,
   if (ProfileInterpreter) {
     Label profile_continue;
 
-    // if no method data exists, go to profile_continue.
+    // If no method data exists, go to profile_continue.
     test_method_data_pointer(mdp, profile_continue);
 
     // Build the base (index * per_case_size_in_bytes()) +
@@ -1651,8 +1648,8 @@ void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& md
   xorr(obj, obj, t0);
   andi(t0, obj, TypeEntries::type_klass_mask);
   beqz(t0, next); // klass seen before, nothing to
-                           // do. The unknown bit may have been
-                           // set already but no need to check.
+                  // do. The unknown bit may have been
+                  // set already but no need to check.
 
   andi(t0, obj, TypeEntries::type_unknown);
   bnez(t0, next);
@@ -1793,8 +1790,7 @@ void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register ca
         // CallTypeData/VirtualCallTypeData to reach its end. Non null
         // if there's a return to profile.
         assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type");
-        slli(tmp, tmp, exact_log2(DataLayout::cell_size));
-        add(mdp, mdp, tmp);
+        shadd(mdp, tmp, mdp, tmp, exact_log2(DataLayout::cell_size));
       }
       sd(mdp, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
     } else {
@@ -1833,7 +1829,7 @@ void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret,
       beq(t0, tmp, do_profile);
       get_method(tmp);
       lhu(t0, Address(tmp, Method::intrinsic_id_offset_in_bytes()));
-      li(t1, static_cast<int>(vmIntrinsics::_compiledLambdaForm));
+      li(t1, vmIntrinsics::_compiledLambdaForm);
       bne(t0, t1, profile_continue);
       bind(do_profile);
     }
@@ -1876,22 +1872,17 @@ void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register t
     add(t0, mdp, off_base);
     add(t1, mdp, type_base);
 
-
-    slli(tmp2, tmp1, per_arg_scale);
-    add(tmp2, tmp2, t0);
+    shadd(tmp2, tmp1, t0, tmp2, per_arg_scale);
     // load offset on the stack from the slot for this parameter
     ld(tmp2, Address(tmp2, 0));
     neg(tmp2, tmp2);
 
     // read the parameter from the local area
-
-    slli(tmp2, tmp2, Interpreter::logStackElementSize);
-    add(tmp2, tmp2, xlocals);
+    shadd(tmp2, tmp2, xlocals, tmp2, Interpreter::logStackElementSize);
     ld(tmp2, Address(tmp2, 0));
 
     // profile the parameter
-    slli(t0, tmp1, per_arg_scale);
-    add(t1, t0, t1);
+    shadd(t1, tmp1, t1, t0, per_arg_scale);
     Address arg_type(t1, 0);
     profile_obj_type(tmp2, arg_type, tmp3);
 
diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
index 21bb67efbb6..4126e8ee70f 100644
--- a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
@@ -122,8 +122,6 @@ class InterpreterMacroAssembler: public MacroAssembler {
   // Load cpool->resolved_klass_at(index).
   void load_resolved_klass_at_offset(Register cpool, Register index, Register klass, Register temp);
 
-  void load_resolved_method_at_index(int byte_no, Register method, Register cache);
-
   void pop_ptr(Register r = x10);
   void pop_i(Register r = x10);
   void pop_l(Register r = x10);
@@ -148,7 +146,7 @@ class InterpreterMacroAssembler: public MacroAssembler {
   void load_ptr(int n, Register val);
   void store_ptr(int n, Register val);
 
-// Load float value from 'address'. The value is loaded onto the FPU register v0.
+  // Load float value from 'address'. The value is loaded onto the FPU register v0.
   void load_float(Address src);
   void load_double(Address src);
 
diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp
index 4ef603451c0..776b0787238 100644
--- a/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp
+++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -45,94 +45,99 @@ Register InterpreterRuntime::SignatureHandlerGenerator::from() { return xlocals;
 Register InterpreterRuntime::SignatureHandlerGenerator::to()   { return sp; }
 Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return t0; }
 
+Register InterpreterRuntime::SignatureHandlerGenerator::next_gpr() {
+  if (_num_reg_int_args < Argument::n_int_register_parameters_c - 1) {
+    return g_INTArgReg[++_num_reg_int_args];
+  }
+  return noreg;
+}
+
+FloatRegister InterpreterRuntime::SignatureHandlerGenerator::next_fpr() {
+  if (_num_reg_fp_args < Argument::n_float_register_parameters_c) {
+    return g_FPArgReg[_num_reg_fp_args++];
+  } else {
+    return fnoreg;
+  }
+}
+
+int InterpreterRuntime::SignatureHandlerGenerator::next_stack_offset() {
+  int ret = _stack_offset;
+  _stack_offset += wordSize;
+  return ret;
+}
+
 InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator(
   const methodHandle& method, CodeBuffer* buffer) : NativeSignatureIterator(method) {
   _masm = new MacroAssembler(buffer); // allocate on resourse area by default
-  _num_int_args = (method->is_static() ? 1 : 0);
-  _num_fp_args = 0;
+  _num_reg_int_args = (method->is_static() ? 1 : 0);
+  _num_reg_fp_args = 0;
   _stack_offset = 0;
 }
 
 void InterpreterRuntime::SignatureHandlerGenerator::pass_int() {
   const Address src(from(), Interpreter::local_offset_in_bytes(offset()));
 
-  if (_num_int_args < Argument::n_int_register_parameters_c - 1) {
-    __ lw(g_INTArgReg[++_num_int_args], src);
+  Register reg = next_gpr();
+  if (reg != noreg) {
+    __ lw(reg, src);
   } else {
     __ lw(x10, src);
-    __ sw(x10, Address(to(), _stack_offset));
-    _stack_offset += wordSize;
-    _num_int_args++;
+    __ sw(x10, Address(to(), next_stack_offset()));
   }
 }
 
 void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
   const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1));
 
-  if (_num_int_args < Argument::n_int_register_parameters_c - 1) {
-    __ ld(g_INTArgReg[++_num_int_args], src);
-  } else {
+  Register reg = next_gpr();
+  if (reg != noreg) {
+    __ ld(reg, src);
+  } else  {
     __ ld(x10, src);
-    __ sd(x10, Address(to(), _stack_offset));
-    _stack_offset += wordSize;
-    _num_int_args++;
+    __ sd(x10, Address(to(), next_stack_offset()));
   }
 }
 
 void InterpreterRuntime::SignatureHandlerGenerator::pass_float() {
   const Address src(from(), Interpreter::local_offset_in_bytes(offset()));
 
-  if (_num_fp_args < Argument::n_float_register_parameters_c) {
-    // to c_farg
-    __ flw(g_FPArgReg[_num_fp_args++], src);
-  } else if (_num_int_args < Argument::n_int_register_parameters_c - 1) {
-    // to c_rarg
-    __ lwu(g_INTArgReg[++_num_int_args], src);
+  FloatRegister reg = next_fpr();
+  if (reg != fnoreg) {
+    __ flw(reg, src);
   } else {
-    // to stack
-    __ lwu(x10, src);
-    __ sw(x10, Address(to(), _stack_offset));
-    _stack_offset += wordSize;
-    _num_fp_args++;
+    // a floating-point argument is passed according to the integer calling
+    // convention if no floating-point argument register available
+    pass_int();
   }
 }
 
 void InterpreterRuntime::SignatureHandlerGenerator::pass_double() {
   const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1));
 
-  if (_num_fp_args < Argument::n_float_register_parameters_c) {
-    // to c_farg
-    __ fld(g_FPArgReg[_num_fp_args++], src);
-  } else if (_num_int_args < Argument::n_int_register_parameters_c - 1) {
-    // to c_rarg
-    __ ld(g_INTArgReg[++_num_int_args], src);
+  FloatRegister reg = next_fpr();
+  if (reg != fnoreg) {
+    __ fld(reg, src);
   } else {
-    // to stack
-    __ ld(x10, src);
-    __ sd(x10, Address(to(), _stack_offset));
-    _stack_offset += wordSize;
-    _num_fp_args++;
+    // a floating-point argument is passed according to the integer calling
+    // convention if no floating-point argument register available
+    pass_long();
   }
 }
 
 void InterpreterRuntime::SignatureHandlerGenerator::pass_object() {
-
-  if (_num_int_args < Argument::n_int_register_parameters_c - 1) {
-    // to reg
-    if (_num_int_args == 0) {
-      assert(offset() == 0, "argument register 1 can only be (non-null) receiver");
-      __ addi(c_rarg1, from(), Interpreter::local_offset_in_bytes(offset()));
-      _num_int_args++;
-    } else {
+  Register reg = next_gpr();
+  if (reg == c_rarg1) {
+    assert(offset() == 0, "argument register 1 can only be (non-null) receiver");
+    __ addi(c_rarg1, from(), Interpreter::local_offset_in_bytes(offset()));
+  } else if (reg != noreg) {
       // c_rarg2-c_rarg7
       __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset()));
-      __ mv(g_INTArgReg[++_num_int_args], 0); //_num_int_args:c_rarg -> 1:c_rarg2,  2:c_rarg3...
+      __ mv(reg, zr); //_num_reg_int_args:c_rarg -> 1:c_rarg2,  2:c_rarg3...
       __ ld(temp(), x10);
       Label L;
       __ beqz(temp(), L);
-      __ mv(g_INTArgReg[_num_int_args], x10);
+      __ mv(reg, x10);
       __ bind(L);
-    }
   } else {
     //to stack
     __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset()));
@@ -141,9 +146,8 @@ void InterpreterRuntime::SignatureHandlerGenerator::pass_object() {
     __ bnez(temp(), L);
     __ mv(x10, zr);
     __ bind(L);
-    __ sd(x10, Address(to(), _stack_offset));
-    _stack_offset += wordSize;
-    _num_int_args++;
+    assert(sizeof(jobject) == wordSize, "");
+    __ sd(x10, Address(to(), next_stack_offset()));
   }
 }
 
@@ -172,84 +176,79 @@ class SlowSignatureHandler
   intptr_t* _int_args;
   intptr_t* _fp_args;
   intptr_t* _fp_identifiers;
-  unsigned int _num_int_args;
-  unsigned int _num_fp_args;
+  unsigned int _num_reg_int_args;
+  unsigned int _num_reg_fp_args;
 
-  virtual void pass_int()
-  {
-    jint from_obj = *(jint *)(_from + Interpreter::local_offset_in_bytes(0));
+  intptr_t* single_slot_addr() {
+    intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0));
     _from -= Interpreter::stackElementSize;
+    return from_addr;
+  }
 
-    if (_num_int_args < Argument::n_int_register_parameters_c - 1) {
-      *_int_args++ = from_obj;
-      _num_int_args++;
-    } else {
-      *_to++ = from_obj;
-      _num_int_args++;
+  intptr_t* double_slot_addr() {
+    intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(1));
+    _from -= 2 * Interpreter::stackElementSize;
+    return from_addr;
+  }
+
+  int pass_gpr(intptr_t value) {
+    if (_num_reg_int_args < Argument::n_int_register_parameters_c - 1) {
+      *_int_args++ = value;
+      return _num_reg_int_args++;
     }
+    return -1;
   }
 
-  virtual void pass_long()
-  {
-    intptr_t from_obj = *(intptr_t*)(_from + Interpreter::local_offset_in_bytes(1));
-    _from -= 2*Interpreter::stackElementSize;
-
-    if (_num_int_args < Argument::n_int_register_parameters_c - 1) {
-      *_int_args++ = from_obj;
-      _num_int_args++;
-    } else {
-      *_to++ = from_obj;
-      _num_int_args++;
+  int pass_fpr(intptr_t value) {
+    if (_num_reg_fp_args < Argument::n_float_register_parameters_c) {
+      *_fp_args++ = value;
+      return _num_reg_fp_args++;
     }
+    return -1;
   }
 
-  virtual void pass_object()
-  {
-    intptr_t *from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0));
-    _from -= Interpreter::stackElementSize;
+  void pass_stack(intptr_t value) {
+    *_to++ = value;
+  }
 
-    if (_num_int_args < Argument::n_int_register_parameters_c - 1) {
-      *_int_args++ = (*from_addr == 0) ? NULL : (intptr_t)from_addr;
-      _num_int_args++;
-    } else {
-      *_to++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr;
-      _num_int_args++;
+  virtual void pass_int() {
+    jint value = *(jint*)single_slot_addr();
+    if (pass_gpr(value) < 0) {
+      pass_stack(value);
     }
   }
 
-  virtual void pass_float()
-  {
-    jint from_obj = *(jint*)(_from + Interpreter::local_offset_in_bytes(0));
-    _from -= Interpreter::stackElementSize;
+  virtual void pass_long() {
+    intptr_t value = *double_slot_addr();
+    if (pass_gpr(value) < 0) {
+      pass_stack(value);
+    }
+  }
 
-    if (_num_fp_args < Argument::n_float_register_parameters_c) {
-      *_fp_args++ = from_obj;
-      _num_fp_args++;
-    } else if (_num_int_args < Argument::n_int_register_parameters_c - 1) {
-      *_int_args++ = from_obj;
-      _num_int_args++;
-    } else {
-      *_to++ = from_obj;
-      _num_fp_args++;
+  virtual void pass_object() {
+    intptr_t* addr = single_slot_addr();
+    intptr_t value = *addr == 0 ? NULL : (intptr_t)addr;
+    if (pass_gpr(value) < 0) {
+      pass_stack(value);
     }
   }
 
-  virtual void pass_double()
-  {
-    intptr_t from_obj = *(intptr_t*)(_from + Interpreter::local_offset_in_bytes(1));
-    _from -= 2*Interpreter::stackElementSize;
-
-    if (_num_fp_args < Argument::n_float_register_parameters_c) {
-      *_fp_args++ = from_obj;
-      *_fp_identifiers |= (1ull << _num_fp_args); // mark as double
-      _num_fp_args++;
-    } else if (_num_int_args < Argument::n_int_register_parameters_c - 1) {
-      // ld/st from_obj as integer, no need to mark _fp_identifiers
-      *_int_args++ = from_obj;
-      _num_int_args++;
-    } else {
-      *_to++ = from_obj;
-      _num_fp_args++;
+  virtual void pass_float() {
+    jint value = *(jint*) single_slot_addr();
+    // a floating-point argument is passed according to the integer calling
+    // convention if no floating-point argument register available
+    if (pass_fpr(value) < 0 && pass_gpr(value) < 0) {
+      pass_stack(value);
+    }
+  }
+
+  virtual void pass_double() {
+    intptr_t value = *double_slot_addr();
+    int arg = pass_fpr(value);
+    if (0 <= arg) {
+      *_fp_identifiers |= (1ull << arg); // mark as double
+    } else if (pass_gpr(value) < 0) { // no need to mark if passing by integer registers or stack
+      pass_stack(value);
     }
   }
 
@@ -261,12 +260,13 @@ class SlowSignatureHandler
     _to   = to;
 
     _int_args = to - (method->is_static() ? 16 : 17);
-    _fp_args =  to - 8;
+    _fp_args  = to - 8;
     _fp_identifiers = to - 9;
     *(int*) _fp_identifiers = 0;
-    _num_int_args = (method->is_static() ? 1 : 0);
-    _num_fp_args = 0;
+    _num_reg_int_args = (method->is_static() ? 1 : 0);
+    _num_reg_fp_args = 0;
   }
+
   ~SlowSignatureHandler()
   {
     _from           = NULL;
diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp
index d56896fefd8..05df63ba2ae 100644
--- a/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp
+++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp
@@ -35,8 +35,8 @@
 class SignatureHandlerGenerator: public NativeSignatureIterator {
  private:
   MacroAssembler* _masm;
-  unsigned int _num_fp_args;
-  unsigned int _num_int_args;
+  unsigned int _num_reg_fp_args;
+  unsigned int _num_reg_int_args;
   int _stack_offset;
 
   void pass_int();
@@ -45,6 +45,10 @@ class SignatureHandlerGenerator: public NativeSignatureIterator {
   void pass_double();
   void pass_object();
 
+  Register next_gpr();
+  FloatRegister next_fpr();
+  int next_stack_offset();
+
  public:
   // Creation
   SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer);
diff --git a/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp
index 76610084f75..5a0c9b812fc 100644
--- a/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp
+++ b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp
@@ -1,7 +1,6 @@
 /*
  * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -30,7 +29,7 @@
 private:
 
   // FP value associated with _last_Java_sp:
-  intptr_t* volatile        _last_Java_fp;           // pointer is volatile not what it points to
+  intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to
 
 public:
   // Each arch must define reset, save, restore
@@ -80,10 +79,11 @@
 
 public:
 
-  void set_last_Java_sp(intptr_t* java_sp)           { _last_Java_sp = java_sp; OrderAccess::release(); }
+  void set_last_Java_sp(intptr_t* java_sp)       { _last_Java_sp = java_sp; OrderAccess::release(); }
+
+  intptr_t* last_Java_fp(void)                   { return _last_Java_fp; }
 
-  intptr_t*   last_Java_fp(void)                     { return _last_Java_fp; }
   // Assert (last_Java_sp == NULL || fp == NULL)
-  void set_last_Java_fp(intptr_t* java_fp)           { OrderAccess::release(); _last_Java_fp = java_fp; }
+  void set_last_Java_fp(intptr_t* fp)            { OrderAccess::release(); _last_Java_fp = fp; }
 
 #endif // CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
index a0c5b7be125..f6e7351c4fc 100644
--- a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
+++ b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
@@ -83,28 +83,10 @@ address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
   // An even value means there are no ongoing safepoint operations
   __ andi(t0, rcounter, 1);
   __ bnez(t0, slow);
-
-  if (JvmtiExport::can_post_field_access()) {
-    // Using barrier to order wrt. JVMTI check and load of result.
-    __ membar(MacroAssembler::LoadLoad);
-
-    // Check to see if a field access watch has been set before we
-    // take the fast path.
-    int32_t offset2;
-    __ la_patchable(result,
-                    ExternalAddress((address) JvmtiExport::get_field_access_count_addr()),
-                    offset2);
-    __ lwu(result, Address(result, offset2));
-    __ bnez(result, slow);
-
-    __ mv(robj, c_rarg1);
-  } else {
-    // Using address dependency to order wrt. load of result.
-    __ xorr(robj, c_rarg1, rcounter);
-    __ xorr(robj, robj, rcounter);               // obj, since
-                                                 // robj ^ rcounter ^ rcounter == robj
-                                                 // robj is address dependent on rcounter.
-  }
+  __ xorr(robj, c_rarg1, rcounter);
+  __ xorr(robj, robj, rcounter);               // obj, since
+                                               // robj ^ rcounter ^ rcounter == robj
+                                               // robj is address dependent on rcounter.
 
   // Both robj and t0 are clobbered by try_resolve_jobject_in_native.
   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
@@ -137,10 +119,8 @@ address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
     default:        ShouldNotReachHere();
   }
 
-  // Using acquire: Order JVMTI check and load of result wrt. succeeding check
-  // (LoadStore for volatile field).
-  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
-
+  __ xorr(rcounter_addr, rcounter_addr, result);
+  __ xorr(rcounter_addr, rcounter_addr, result);
   __ lw(t0, safepoint_counter_addr);
   __ bne(rcounter, t0, slow);
 
@@ -172,7 +152,6 @@ address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
     int32_t tmp_offset = 0;
     __ la_patchable(t0, ExternalAddress(slow_case_addr), tmp_offset);
     __ jalr(x1, t0, tmp_offset);
-    __ ifence();
     __ leave();
     __ ret();
   }
diff --git a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
index a0c0cebf41a..df3c0267eea 100644
--- a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
+++ b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
@@ -1,7 +1,6 @@
 /*
  * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -28,7 +27,6 @@
 #define CPU_RISCV_JNITYPES_RISCV_HPP
 
 #include "jni.h"
-#include "memory/allocation.hpp"
 #include "oops/oop.hpp"
 
 // This file holds platform-dependent routines used to write primitive jni
@@ -67,9 +65,9 @@ class JNITypes : private AllStatic {
   }
 
   // Oops are stored in native format in one JavaCallArgument slot at *to.
-  static inline void    put_obj(oop  from, intptr_t *to)           { *(oop *)(to +   0  ) =  from; }
-  static inline void    put_obj(oop  from, intptr_t *to, int& pos) { *(oop *)(to + pos++) =  from; }
-  static inline void    put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; }
+  static inline void    put_obj(oop  from, intptr_t *to)                { *(oop *)(to +   0  ) =  from; }
+  static inline void    put_obj(oop  from, intptr_t *to, int& pos)      { *(oop *)(to + pos++) =  from; }
+  static inline void    put_obj(oop *from, intptr_t *to, int& pos)      { *(oop *)(to + pos++) = *from; }
 
   // Floats are stored in native format in one JavaCallArgument slot at *to.
   static inline void    put_float(jfloat  from, intptr_t *to)           { *(jfloat *)(to +   0  ) =  from;  }
diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
index 3406d29ed23..f35f3a86797 100644
--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -49,6 +49,7 @@
 #include "runtime/thread.hpp"
 #ifdef COMPILER2
 #include "opto/compile.hpp"
+#include "opto/intrinsicnode.hpp"
 #include "opto/node.hpp"
 #include "opto/output.hpp"
 #endif
@@ -88,8 +89,9 @@ static void pass_arg3(MacroAssembler* masm, Register arg) {
   }
 }
 
-void MacroAssembler::align(int modulus) {
-  while (offset() % modulus != 0) { nop(); }
+void MacroAssembler::align(int modulus, int extra_offset) {
+  CompressibleRegion cr(this);
+  while ((offset() + extra_offset) % modulus != 0) { nop(); }
 }
 
 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
@@ -190,6 +192,22 @@ void MacroAssembler::call_VM(Register oop_result,
 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {}
 void MacroAssembler::check_and_handle_popframe(Register java_thread) {}
 
+RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
+                                                      Register tmp,
+                                                      int offset) {
+  intptr_t value = *delayed_value_addr;
+  if (value != 0)
+    return RegisterOrConstant(value + offset);
+
+  // load indirectly to solve generation ordering problem
+  ld(tmp, ExternalAddress((address) delayed_value_addr));
+
+  if (offset != 0)
+    add(tmp, tmp, offset);
+
+  return RegisterOrConstant(tmp);
+}
+
 // Calls to C land
 //
 // When entering C land, the fp, & esp of the last Java frame have to be recorded
@@ -198,7 +216,7 @@ void MacroAssembler::check_and_handle_popframe(Register java_thread) {}
 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
                                          Register last_java_fp,
                                          Register last_java_pc,
-                                         Register temp) {
+                                         Register tmp) {
 
   if (last_java_pc->is_valid()) {
       sd(last_java_pc, Address(xthread,
@@ -208,8 +226,8 @@ void MacroAssembler::set_last_Java_frame(Register last_java_sp,
 
   // determine last_java_sp register
   if (last_java_sp == sp) {
-    mv(temp, sp);
-    last_java_sp = temp;
+    mv(tmp, sp);
+    last_java_sp = tmp;
   } else if (!last_java_sp->is_valid()) {
     last_java_sp = esp;
   }
@@ -225,25 +243,49 @@ void MacroAssembler::set_last_Java_frame(Register last_java_sp,
 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
                                          Register last_java_fp,
                                          address  last_java_pc,
-                                         Register temp) {
+                                         Register tmp) {
   assert(last_java_pc != NULL, "must provide a valid PC");
 
-  la(temp, last_java_pc);
-  sd(temp, Address(xthread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
+  la(tmp, last_java_pc);
+  sd(tmp, Address(xthread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
 
-  set_last_Java_frame(last_java_sp, last_java_fp, noreg, temp);
+  set_last_Java_frame(last_java_sp, last_java_fp, noreg, tmp);
 }
 
 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
                                          Register last_java_fp,
                                          Label &L,
-                                         Register temp) {
+                                         Register tmp) {
   if (L.is_bound()) {
-    set_last_Java_frame(last_java_sp, last_java_fp, target(L), temp);
+    set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp);
   } else {
     InstructionMark im(this);
     L.add_patch_at(code(), locator());
-    set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, temp);
+    set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp);
+  }
+}
+
+// Just like safepoint_poll, but use an acquiring load for thread-
+// local polling.
+//
+// We need an acquire here to ensure that any subsequent load of the
+// global SafepointSynchronize::_state flag is ordered after this load
+// of the local Thread::_polling page.  We don't want this poll to
+// return false (i.e. not safepointing) and a later poll of the global
+// SafepointSynchronize::_state spuriously to return true.
+//
+// This is to avoid a race when we're in a native->Java transition
+// racing the code which wakes up from a safepoint.
+//
+void MacroAssembler::safepoint_poll_acquire(Label& slow_path) {
+  if (SafepointMechanism::uses_thread_local_poll()) {
+    membar(MacroAssembler::AnyAny);
+    ld(t1, Address(xthread, Thread::polling_page_offset()));
+    membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
+    andi(t0, t1, SafepointMechanism::poll_bit());
+    bnez(t0, slow_path);
+  } else {
+    safepoint_poll(slow_path);
   }
 }
 
@@ -344,14 +386,13 @@ void MacroAssembler::verify_oop(Register reg, const char* s) {
   }
   BLOCK_COMMENT("verify_oop {");
 
-  push_reg(RegSet::of(lr, t0, t1, c_rarg0), sp);
+  push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
 
   mv(c_rarg0, reg); // c_rarg0 : x10
-  if(b != NULL) {
-    li(t0, (uintptr_t)(address)b);
-  } else {
-    ShouldNotReachHere();
-  }
+  // The length of the instruction sequence emitted should be independent
+  // of the values of the local char buffer address so that the size of mach
+  // nodes for scratch emit and normal emit matches.
+  mv(t0, (address)b);
 
   // call indirectly to solve generation ordering problem
   int32_t offset = 0;
@@ -359,7 +400,7 @@ void MacroAssembler::verify_oop(Register reg, const char* s) {
   ld(t1, Address(t1, offset));
   jalr(t1);
 
-  pop_reg(RegSet::of(lr, t0, t1, c_rarg0), sp);
+  pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
 
   BLOCK_COMMENT("} verify_oop");
 }
@@ -378,7 +419,7 @@ void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
   }
   BLOCK_COMMENT("verify_oop_addr {");
 
-  push_reg(RegSet::of(lr, t0, t1, c_rarg0), sp);
+  push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
 
   if (addr.uses(sp)) {
     la(x10, addr);
@@ -386,11 +427,11 @@ void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
   } else {
     ld(x10, addr);
   }
-  if(b != NULL) {
-    li(t0, (uintptr_t)(address)b);
-  } else {
-    ShouldNotReachHere();
-  }
+
+  // The length of the instruction sequence emitted should be independent
+  // of the values of the local char buffer address so that the size of mach
+  // nodes for scratch emit and normal emit matches.
+  mv(t0, (address)b);
 
   // call indirectly to solve generation ordering problem
   int32_t offset = 0;
@@ -398,7 +439,7 @@ void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
   ld(t1, Address(t1, offset));
   jalr(t1);
 
-  pop_reg(RegSet::of(lr, t0, t1, c_rarg0), sp);
+  pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
 
   BLOCK_COMMENT("} verify_oop_addr");
 }
@@ -416,8 +457,7 @@ Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
     return Address(esp, arg_slot.as_constant() * stackElementSize + offset);
   } else {
     assert_different_registers(t0, arg_slot.as_register());
-    slli(t0, arg_slot.as_register(), exact_log2(stackElementSize));
-    add(t0, esp, t0);
+    shadd(t0, arg_slot.as_register(), esp, t0, exact_log2(stackElementSize));
     return Address(t0, offset);
   }
 }
@@ -480,12 +520,8 @@ void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[])
       tty->print_cr("x31 = 0x%016lx", regs[31]);
       BREAKPOINT;
     }
-    ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
-  } else {
-    ttyLocker ttyl;
-    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
-    assert(false, "DEBUG MESSAGE: %s", msg);
   }
+  fatal("DEBUG MESSAGE: %s", msg);
 }
 
 void MacroAssembler::resolve_jobject(Register value, Register thread, Register tmp) {
@@ -512,12 +548,11 @@ void MacroAssembler::resolve_jobject(Register value, Register thread, Register t
 void MacroAssembler::stop(const char* msg) {
   address ip = pc();
   pusha();
-  if(msg != NULL && ip != NULL) {
-    li(c_rarg0, (uintptr_t)(address)msg);
-    li(c_rarg1, (uintptr_t)(address)ip);
-  } else {
-    ShouldNotReachHere();
-  }
+  // The length of the instruction sequence emitted should be independent
+  // of the values of msg and ip so that the size of mach nodes for scratch
+  // emit and normal emit matches.
+  mv(c_rarg0, (address)msg);
+  mv(c_rarg1, (address)ip);
   mv(c_rarg2, sp);
   mv(c_rarg3, CAST_FROM_FN_PTR(address, MacroAssembler::debug64));
   jalr(c_rarg3);
@@ -540,7 +575,6 @@ void MacroAssembler::emit_static_call_stub() {
   // exact layout of this stub.
 
   ifence();
-
   mov_metadata(xmethod, (Metadata*)NULL);
 
   // Jump to the entry point of the i2c stub.
@@ -548,11 +582,11 @@ void MacroAssembler::emit_static_call_stub() {
   movptr_with_offset(t0, 0, offset);
   jalr(x0, t0, offset);
 }
+
 void MacroAssembler::call_VM_leaf_base(address entry_point,
                                        int number_of_arguments,
                                        Label *retaddr) {
   call_native_base(entry_point, retaddr);
-  ifence();
 }
 
 void MacroAssembler::call_native(address entry_point, Register arg_0) {
@@ -658,6 +692,10 @@ void MacroAssembler::sext_w(Register Rd, Register Rs) {
   addiw(Rd, Rs, 0);
 }
 
+void MacroAssembler::zext_b(Register Rd, Register Rs) {
+  andi(Rd, Rs, 0xFF);
+}
+
 void MacroAssembler::seqz(Register Rd, Register Rs) {
   sltiu(Rd, Rs, 1);
 }
@@ -702,6 +740,18 @@ void MacroAssembler::fneg_d(FloatRegister Rd, FloatRegister Rs) {
   fsgnjn_d(Rd, Rs, Rs);
 }
 
+void MacroAssembler::vmnot_m(VectorRegister vd, VectorRegister vs) {
+  vmnand_mm(vd, vs, vs);
+}
+
+void MacroAssembler::vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm) {
+  vnsrl_wx(vd, vs, x0, vm);
+}
+
+void MacroAssembler::vfneg_v(VectorRegister vd, VectorRegister vs) {
+  vfsgnjn_vv(vd, vs, vs);
+}
+
 void MacroAssembler::la(Register Rd, const address &dest) {
   int64_t offset = dest - pc();
   if (is_offset_in_range(offset, 32)) {
@@ -717,7 +767,7 @@ void MacroAssembler::la(Register Rd, const Address &adr) {
   code_section()->relocate(inst_mark(), adr.rspec());
   relocInfo::relocType rtype = adr.rspec().reloc()->type();
 
-  switch(adr.getMode()) {
+  switch (adr.getMode()) {
     case Address::literal: {
       if (rtype == relocInfo::none) {
         li(Rd, (intptr_t)(adr.target()));
@@ -726,7 +776,7 @@ void MacroAssembler::la(Register Rd, const Address &adr) {
       }
       break;
     }
-    case Address::base_plus_offset:{
+    case Address::base_plus_offset: {
       int32_t offset = 0;
       baseOffset(Rd, adr, offset);
       addi(Rd, Rd, offset);
@@ -741,13 +791,13 @@ void MacroAssembler::la(Register Rd, Label &label) {
   la(Rd, target(label));
 }
 
-#define INSN(NAME)                                                                    \
-  void MacroAssembler::NAME##z(Register Rs, const address &dest) {                    \
-    NAME(Rs, zr, dest);                                                               \
-  }                                                                                   \
-  void MacroAssembler::NAME##z(Register Rs, Label &l, bool is_far) {                  \
-    NAME(Rs, zr, l, is_far);                                                          \
-  }                                                                                   \
+#define INSN(NAME)                                                                \
+  void MacroAssembler::NAME##z(Register Rs, const address &dest) {                \
+    NAME(Rs, zr, dest);                                                           \
+  }                                                                               \
+  void MacroAssembler::NAME##z(Register Rs, Label &l, bool is_far) {              \
+    NAME(Rs, zr, l, is_far);                                                      \
+  }                                                                               \
 
   INSN(beq);
   INSN(bne);
@@ -760,14 +810,14 @@ void MacroAssembler::la(Register Rd, Label &label) {
 
 // Float compare branch instructions
 
-#define INSN(NAME, FLOATCMP, BRANCH)                                                                                                     \
-  void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) {                    \
-    FLOATCMP##_s(t0, Rs1, Rs2);                                                                                                          \
-    BRANCH(t0, l, is_far);                                                                                                               \
-  }                                                                                                                                      \
-  void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) {                   \
-    FLOATCMP##_d(t0, Rs1, Rs2);                                                                                                          \
-    BRANCH(t0, l, is_far);                                                                                                               \
+#define INSN(NAME, FLOATCMP, BRANCH)                                                                                   \
+  void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) {  \
+    FLOATCMP##_s(t0, Rs1, Rs2);                                                                                        \
+    BRANCH(t0, l, is_far);                                                                                             \
+  }                                                                                                                    \
+  void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \
+    FLOATCMP##_d(t0, Rs1, Rs2);                                                                                        \
+    BRANCH(t0, l, is_far);                                                                                             \
   }
 
   INSN(beq, feq, bnez);
@@ -776,30 +826,30 @@ void MacroAssembler::la(Register Rd, Label &label) {
 #undef INSN
 
 
-#define INSN(NAME, FLOATCMP1, FLOATCMP2)                                                 \
-  void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,      \
-                                    bool is_far, bool is_unordered) {                    \
-    if(is_unordered) {                                                                   \
-      /* jump if either source is NaN or condition is expected */                        \
-      FLOATCMP2##_s(t0, Rs2, Rs1);                                                       \
-      beqz(t0, l, is_far);                                                               \
-    } else {                                                                             \
-      /* jump if no NaN in source and condition is expected */                           \
-      FLOATCMP1##_s(t0, Rs1, Rs2);                                                       \
-      bnez(t0, l, is_far);                                                               \
-    }                                                                                    \
-  }                                                                                      \
-  void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,     \
-                                     bool is_far, bool is_unordered) {                   \
-    if(is_unordered) {                                                                   \
-      /* jump if either source is NaN or condition is expected */                        \
-      FLOATCMP2##_d(t0, Rs2, Rs1);                                                       \
-      beqz(t0, l, is_far);                                                               \
-    } else {                                                                             \
-      /* jump if no NaN in source and condition is expected */                           \
-      FLOATCMP1##_d(t0, Rs1, Rs2);                                                       \
-      bnez(t0, l, is_far);                                                               \
-    }                                                                                    \
+#define INSN(NAME, FLOATCMP1, FLOATCMP2)                                              \
+  void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,   \
+                                    bool is_far, bool is_unordered) {                 \
+    if (is_unordered) {                                                               \
+      /* jump if either source is NaN or condition is expected */                     \
+      FLOATCMP2##_s(t0, Rs2, Rs1);                                                    \
+      beqz(t0, l, is_far);                                                            \
+    } else {                                                                          \
+      /* jump if no NaN in source and condition is expected */                        \
+      FLOATCMP1##_s(t0, Rs1, Rs2);                                                    \
+      bnez(t0, l, is_far);                                                            \
+    }                                                                                 \
+  }                                                                                   \
+  void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,  \
+                                     bool is_far, bool is_unordered) {                \
+    if (is_unordered) {                                                               \
+      /* jump if either source is NaN or condition is expected */                     \
+      FLOATCMP2##_d(t0, Rs2, Rs1);                                                    \
+      beqz(t0, l, is_far);                                                            \
+    } else {                                                                          \
+      /* jump if no NaN in source and condition is expected */                        \
+      FLOATCMP1##_d(t0, Rs1, Rs2);                                                    \
+      bnez(t0, l, is_far);                                                            \
+    }                                                                                 \
   }
 
   INSN(ble, fle, flt);
@@ -807,14 +857,14 @@ void MacroAssembler::la(Register Rd, Label &label) {
 
 #undef INSN
 
-#define INSN(NAME, CMP)                                                                  \
-  void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,      \
-                                    bool is_far, bool is_unordered) {                    \
-    float_##CMP(Rs2, Rs1, l, is_far, is_unordered);                                      \
-  }                                                                                      \
-  void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,     \
-                                     bool is_far, bool is_unordered) {                   \
-    double_##CMP(Rs2, Rs1, l, is_far, is_unordered);                         \
+#define INSN(NAME, CMP)                                                              \
+  void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,  \
+                                    bool is_far, bool is_unordered) {                \
+    float_##CMP(Rs2, Rs1, l, is_far, is_unordered);                                  \
+  }                                                                                  \
+  void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \
+                                     bool is_far, bool is_unordered) {               \
+    double_##CMP(Rs2, Rs1, l, is_far, is_unordered);                                 \
   }
 
   INSN(bgt, blt);
@@ -904,102 +954,6 @@ void MacroAssembler::fsflagsi(Register Rd, unsigned imm) {
 
 #undef INSN
 
-#ifdef COMPILER2
-typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far);
-typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label,
-                                                              bool is_far, bool is_unordered);
-
-static conditional_branch_insn conditional_branches[] =
-{
-  /* SHORT branches */
-  (conditional_branch_insn)&Assembler::beq,
-  (conditional_branch_insn)&Assembler::bgt,
-  NULL, // BoolTest::overflow
-  (conditional_branch_insn)&Assembler::blt,
-  (conditional_branch_insn)&Assembler::bne,
-  (conditional_branch_insn)&Assembler::ble,
-  NULL, // BoolTest::no_overflow
-  (conditional_branch_insn)&Assembler::bge,
-
-  /* UNSIGNED branches */
-  (conditional_branch_insn)&Assembler::beq,
-  (conditional_branch_insn)&Assembler::bgtu,
-  NULL,
-  (conditional_branch_insn)&Assembler::bltu,
-  (conditional_branch_insn)&Assembler::bne,
-  (conditional_branch_insn)&Assembler::bleu,
-  NULL,
-  (conditional_branch_insn)&Assembler::bgeu
-};
-
-static float_conditional_branch_insn float_conditional_branches[] =
-{
-  /* FLOAT SHORT branches */
-  (float_conditional_branch_insn)&MacroAssembler::float_beq,
-  (float_conditional_branch_insn)&MacroAssembler::float_bgt,
-  NULL,  // BoolTest::overflow
-  (float_conditional_branch_insn)&MacroAssembler::float_blt,
-  (float_conditional_branch_insn)&MacroAssembler::float_bne,
-  (float_conditional_branch_insn)&MacroAssembler::float_ble,
-  NULL, // BoolTest::no_overflow
-  (float_conditional_branch_insn)&MacroAssembler::float_bge,
-
-  /* DOUBLE SHORT branches */
-  (float_conditional_branch_insn)&MacroAssembler::double_beq,
-  (float_conditional_branch_insn)&MacroAssembler::double_bgt,
-  NULL,
-  (float_conditional_branch_insn)&MacroAssembler::double_blt,
-  (float_conditional_branch_insn)&MacroAssembler::double_bne,
-  (float_conditional_branch_insn)&MacroAssembler::double_ble,
-  NULL,
-  (float_conditional_branch_insn)&MacroAssembler::double_bge
-};
-
-void MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) {
-  assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])),
-         "invalid conditional branch index");
-  (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far);
-}
-
-// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use
-// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode().
-void MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) {
-  assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])),
-         "invalid float conditional branch index");
-  int booltest_flag = cmpFlag & ~(MacroAssembler::double_branch_mask);
-  (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far,
-                                               (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true);
-}
-
-void MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
-  switch (cmpFlag) {
-    case BoolTest::eq:
-    case BoolTest::le:
-      beqz(op1, L, is_far);
-      break;
-    case BoolTest::ne:
-    case BoolTest::gt:
-      bnez(op1, L, is_far);
-      break;
-    default:
-      ShouldNotReachHere();
-  }
-}
-
-void MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
-  switch (cmpFlag) {
-    case BoolTest::eq:
-      beqz(op1, L, is_far);
-      break;
-    case BoolTest::ne:
-      bnez(op1, L, is_far);
-      break;
-    default:
-      ShouldNotReachHere();
-  }
-}
-#endif // COMPILER2
-
 void MacroAssembler::push_reg(Register Rs)
 {
   addi(esp, esp, 0 - wordSize);
@@ -1013,22 +967,14 @@ void MacroAssembler::pop_reg(Register Rd)
 }
 
 int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) {
-  DEBUG_ONLY(int words_pushed = 0;)
-
   int count = 0;
-  // Sp is x2, and zr is x0, which should not be pushed.
-  // If the number of registers is odd, zr is used for stack alignment.Otherwise, it will be ignored.
-  bitset &= ~ (1U << 2);
-  bitset |= 0x1;
-
   // Scan bitset to accumulate register pairs
-  for (int reg = 31; reg >= 0; reg --) {
+  for (int reg = 31; reg >= 0; reg--) {
     if ((1U << 31) & bitset) {
       regs[count++] = reg;
     }
     bitset <<= 1;
   }
-  count &= ~1;  // Only push an even number of regs
   return count;
 }
 
@@ -1036,15 +982,18 @@ int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) {
 // Return the number of words pushed
 int MacroAssembler::push_reg(unsigned int bitset, Register stack) {
   DEBUG_ONLY(int words_pushed = 0;)
+  CompressibleRegion cr(this);
 
   unsigned char regs[32];
   int count = bitset_to_regs(bitset, regs);
+  // reserve one slot to align for odd count
+  int offset = is_even(count) ? 0 : wordSize;
 
   if (count) {
-    addi(stack, stack, - count * wordSize);
+    addi(stack, stack, - count * wordSize - offset);
   }
   for (int i = count - 1; i >= 0; i--) {
-    sd(as_Register(regs[i]), Address(stack, (count -1 - i) * wordSize));
+    sd(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset));
     DEBUG_ONLY(words_pushed ++;)
   }
 
@@ -1055,42 +1004,33 @@ int MacroAssembler::push_reg(unsigned int bitset, Register stack) {
 
 int MacroAssembler::pop_reg(unsigned int bitset, Register stack) {
   DEBUG_ONLY(int words_popped = 0;)
+  CompressibleRegion cr(this);
 
   unsigned char regs[32];
   int count = bitset_to_regs(bitset, regs);
+  // reserve one slot to align for odd count
+  int offset = is_even(count) ? 0 : wordSize;
 
   for (int i = count - 1; i >= 0; i--) {
-    ld(as_Register(regs[i]), Address(stack, (count -1 - i) * wordSize));
+    ld(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset));
     DEBUG_ONLY(words_popped ++;)
   }
 
   if (count) {
-    addi(stack, stack, count * wordSize);
+    addi(stack, stack, count * wordSize + offset);
   }
   assert(words_popped == count, "oops, popped != count");
 
   return count;
 }
 
-int MacroAssembler::bitset_to_fregs(unsigned int bitset, unsigned char* regs) {
-  int count = 0;
-  // Scan bitset to accumulate register pairs
-  for (int reg = 31; reg >= 0; reg--) {
-    if ((1U << 31) & bitset) {
-      regs[count++] = reg;
-    }
-    bitset <<= 1;
-  }
-
-  return count;
-}
-
 // Push float registers in the bitset, except sp.
 // Return the number of heapwords pushed.
 int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
+  CompressibleRegion cr(this);
   int words_pushed = 0;
   unsigned char regs[32];
-  int count = bitset_to_fregs(bitset, regs);
+  int count = bitset_to_regs(bitset, regs);
   int push_slots = count + (count & 1);
 
   if (count) {
@@ -1107,9 +1047,10 @@ int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
 }
 
 int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
+  CompressibleRegion cr(this);
   int words_popped = 0;
   unsigned char regs[32];
-  int count = bitset_to_fregs(bitset, regs);
+  int count = bitset_to_regs(bitset, regs);
   int pop_slots = count + (count & 1);
 
   for (int i = count - 1; i >= 0; i--) {
@@ -1125,18 +1066,6 @@ int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
   return count;
 }
 
-void MacroAssembler::vmnot_m(VectorRegister vd, VectorRegister vs) {
-  vmnand_mm(vd, vs, vs);
-}
-
-void MacroAssembler::vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm) {
-  vnsrl_wx(vd, vs, x0, vm);
-}
-
-void MacroAssembler::vfneg_v(VectorRegister vd, VectorRegister vs) {
-  vfsgnjn_vv(vd, vs, vs);
-}
-
 // CSky specific ldd/lwd/lwud/swd/sdd to merge 2 load or 2 store instructions
 // Checks whether current and previous load/store can be merged.
 // Returns true if it can be merged, else false.
@@ -1331,6 +1260,7 @@ void MacroAssembler::sw(Register Rw, const Address& adr) {
 }
 
 void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) {
+  CompressibleRegion cr(this);
   // Push integer registers x7, x10-x17, x28-x31.
   push_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp);
 
@@ -1345,6 +1275,7 @@ void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) {
 }
 
 void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) {
+  CompressibleRegion cr(this);
   int offset = 0;
   for (int i = 0; i < 32; i++) {
     if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) {
@@ -1356,18 +1287,22 @@ void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) {
   pop_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp);
 }
 
-// Push all the integer registers, except zr(x0) & sp(x2).
+// Push all the integer registers, except zr(x0) & sp(x2) & gp(x3) & tp(x4).
 void MacroAssembler::pusha() {
-  push_reg(0xfffffffa, sp);
+  CompressibleRegion cr(this);
+  push_reg(0xffffffe2, sp);
 }
 
+// Pop all the integer registers, except zr(x0) & sp(x2) & gp(x3) & tp(x4).
 void MacroAssembler::popa() {
-  pop_reg(0xfffffffa, sp);
+  CompressibleRegion cr(this);
+  pop_reg(0xffffffe2, sp);
 }
 
 void MacroAssembler::push_CPU_state() {
-  // integer registers, except zr(x0) & ra(x1) & sp(x2)
-  push_reg(0xfffffff8, sp);
+  CompressibleRegion cr(this);
+  // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4)
+  push_reg(0xffffffe0, sp);
 
   // float registers
   addi(sp, sp, - 32 * wordSize);
@@ -1377,14 +1312,16 @@ void MacroAssembler::push_CPU_state() {
 }
 
 void MacroAssembler::pop_CPU_state() {
+  CompressibleRegion cr(this);
+
   // float registers
   for (int i = 0; i < 32; i++) {
     fld(as_FloatRegister(i), Address(sp, i * wordSize));
   }
   addi(sp, sp, 32 * wordSize);
 
-  // integer registers, except zr(x0) & ra(x1) & sp(x2)
-  pop_reg(0xfffffff8, sp);
+  // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4)
+  pop_reg(0xffffffe0, sp);
 }
 
 static int patch_offset_in_jal(address branch, int64_t offset) {
@@ -1534,10 +1471,14 @@ int MacroAssembler::pd_patch_instruction_size(address branch, address target) {
     int64_t imm = (intptr_t)target;
     return patch_imm_in_li32(branch, (int32_t)imm);
   } else {
-    tty->print_cr("pd_patch_instruction_size: instruction 0x%x could not be patched!\n", *(unsigned*)branch);
+#ifdef ASSERT
+    tty->print_cr("pd_patch_instruction_size: instruction 0x%x at " INTPTR_FORMAT " could not be patched!\n",
+                  *(unsigned*)branch, p2i(branch));
+    Disassembler::decode(branch - 16, branch + 16);
+#endif
     ShouldNotReachHere();
+    return -1;
   }
-  return -1;
 }
 
 address MacroAssembler::target_addr_for_insn(address insn_addr) {
@@ -1567,7 +1508,7 @@ int MacroAssembler::patch_oop(address insn_addr, address o) {
   // instruction.
   if (NativeInstruction::is_li32_at(insn_addr)) {
     // Move narrow OOP
-    narrowOop n = CompressedOops::encode(cast_to_oop(o));
+    narrowOop n = CompressedOops::encode((oop)o);
     return patch_imm_in_li32(insn_addr, (int32_t)n);
   } else if (NativeInstruction::is_movptr_at(insn_addr)) {
     // Move wide OOP
@@ -1580,11 +1521,7 @@ int MacroAssembler::patch_oop(address insn_addr, address o) {
 void MacroAssembler::reinit_heapbase() {
   if (UseCompressedOops) {
     if (Universe::is_fully_initialized()) {
-      if (Universe::narrow_ptrs_base() == NULL) {
-        li(xheapbase, 0);
-      } else {
-        mv(xheapbase, Universe::narrow_ptrs_base());
-      }
+      mv(xheapbase, Universe::narrow_ptrs_base());
     } else {
       int32_t offset = 0;
       la_patchable(xheapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()), offset);
@@ -1593,18 +1530,6 @@ void MacroAssembler::reinit_heapbase() {
   }
 }
 
-void MacroAssembler::mv(Register Rd, int64_t imm64) {
-  li(Rd, imm64);
-}
-
-void MacroAssembler::mv(Register Rd, int imm) {
-  mv(Rd, (int64_t)imm);
-}
-
-void MacroAssembler::mvw(Register Rd, int32_t imm32) {
-  mv(Rd, imm32);
-}
-
 void MacroAssembler::mv(Register Rd, Address dest) {
   assert(dest.getMode() == Address::literal, "Address mode should be Address::literal");
   code_section()->relocate(pc(), dest.rspec());
@@ -1612,7 +1537,7 @@ void MacroAssembler::mv(Register Rd, Address dest) {
 }
 
 void MacroAssembler::mv(Register Rd, address addr) {
-  // Here in case of use with relocation, use fix length instruciton
+  // Here in case of use with relocation, use fix length instruction
   // movptr instead of li
   movptr(Rd, addr);
 }
@@ -1691,136 +1616,164 @@ void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in
   }
 }
 
-void MacroAssembler::reverseb16(Register Rd, Register Rs, Register Rtmp1, Register Rtmp2) {
-  // This method is only used for grev16
-  // Rd = Rs[47:0] Rs[55:48] Rs[63:56]
-  assert_different_registers(Rs, Rtmp1, Rtmp2);
-  assert_different_registers(Rd, Rtmp1);
-  srli(Rtmp1, Rs, 48);
-  andi(Rtmp2, Rtmp1, 0xff);
-  slli(Rtmp2, Rtmp2, 8);
-  srli(Rtmp1, Rtmp1, 8);
-  orr(Rtmp1, Rtmp1, Rtmp2);
-  slli(Rd, Rs, 16);
-  orr(Rd, Rd, Rtmp1);
-}
-
-void MacroAssembler::reverseh32(Register Rd, Register Rs, Register Rtmp1, Register Rtmp2) {
-  // This method is only used for grev32
-  // Rd[63:0] = Rs[31:0] Rs[47:32] Rs[63:48]
-  assert_different_registers(Rs, Rtmp1, Rtmp2);
-  assert_different_registers(Rd, Rtmp1);
-  srli(Rtmp1, Rs, 32);
-  slli(Rtmp2, Rtmp1, 48);
-  srli(Rtmp2, Rtmp2, 32);
-  srli(Rtmp1, Rtmp1, 16);
-  orr(Rtmp1, Rtmp1, Rtmp2);
-  slli(Rd, Rs, 32);
-  orr(Rd, Rd, Rtmp1);
-}
-
-void MacroAssembler::grevh(Register Rd, Register Rs, Register Rtmp) {
-  // Reverse bytes in half-word
-  // Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits)
-  assert_different_registers(Rs, Rtmp);
-  assert_different_registers(Rd, Rtmp);
-  srli(Rtmp, Rs, 8);
-  andi(Rtmp, Rtmp, 0xFF);
+// reverse bytes in halfword in lower 16 bits and sign-extend
+// Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits)
+void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) {
+  if (UseRVB) {
+    rev8(Rd, Rs);
+    srai(Rd, Rd, 48);
+    return;
+  }
+  assert_different_registers(Rs, tmp);
+  assert_different_registers(Rd, tmp);
+  srli(tmp, Rs, 8);
+  andi(tmp, tmp, 0xFF);
   slli(Rd, Rs, 56);
   srai(Rd, Rd, 48); // sign-extend
-  orr(Rd, Rd, Rtmp);
+  orr(Rd, Rd, tmp);
 }
 
-void MacroAssembler::grevhu(Register Rd, Register Rs, Register Rtmp) {
-  // Reverse bytes in half-word
-  // Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits)
-  assert_different_registers(Rs, Rtmp);
-  assert_different_registers(Rd, Rtmp);
-  srli(Rtmp, Rs, 8);
-  andi(Rtmp, Rtmp, 0xFF);
-  andi(Rd, Rs, 0xFF);
-  slli(Rd, Rd, 8);
-  orr(Rd, Rd, Rtmp);
-}
-
-void MacroAssembler::grev16w(Register Rd, Register Rs, Register Rtmp1, Register Rtmp2) {
-  // Reverse bytes in half-word (32bit)
-  // Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (sign-extend to 64 bits)
-  assert_different_registers(Rs, Rtmp1, Rtmp2);
-  assert_different_registers(Rd, Rtmp1, Rtmp2);
-  srli(Rtmp2, Rs, 16);
-  grevh(Rtmp2, Rtmp2, Rtmp1);
-  grevhu(Rd, Rs, Rtmp1);
-  slli(Rtmp2, Rtmp2, 16);
-  orr(Rd, Rd, Rtmp2);
-}
-
-void MacroAssembler::grev16wu(Register Rd, Register Rs, Register Rtmp1, Register Rtmp2) {
-  // Reverse bytes in half-word (32bit)
-  // Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (zero-extend to 64 bits)
-  assert_different_registers(Rs, Rtmp1, Rtmp2);
-  assert_different_registers(Rd, Rtmp1, Rtmp2);
-  srli(Rtmp2, Rs, 16);
-  grevhu(Rtmp2, Rtmp2, Rtmp1);
-  grevhu(Rd, Rs, Rtmp1);
-  slli(Rtmp2, Rtmp2, 16);
-  orr(Rd, Rd, Rtmp2);
-}
-
-void MacroAssembler::grevw(Register Rd, Register Rs, Register Rtmp1, Register Rtmp2) {
-  // Reverse bytes in word (32bit)
-  // Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits)
-  assert_different_registers(Rs, Rtmp1, Rtmp2);
-  assert_different_registers(Rd, Rtmp1, Rtmp2);
-  grev16wu(Rd, Rs, Rtmp1, Rtmp2);
-  slli(Rtmp2, Rd, 48);
-  srai(Rtmp2, Rtmp2, 32); // sign-extend
+// reverse bytes in lower word and sign-extend
+// Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits)
+void MacroAssembler::revb_w_w(Register Rd, Register Rs, Register tmp1, Register tmp2) {
+  if (UseRVB) {
+    rev8(Rd, Rs);
+    srai(Rd, Rd, 32);
+    return;
+  }
+  assert_different_registers(Rs, tmp1, tmp2);
+  assert_different_registers(Rd, tmp1, tmp2);
+  revb_h_w_u(Rd, Rs, tmp1, tmp2);
+  slli(tmp2, Rd, 48);
+  srai(tmp2, tmp2, 32); // sign-extend
   srli(Rd, Rd, 16);
-  orr(Rd, Rd, Rtmp2);
+  orr(Rd, Rd, tmp2);
 }
 
-void MacroAssembler::grevwu(Register Rd, Register Rs, Register Rtmp1, Register Rtmp2) {
-  // Reverse bytes in word (32bit)
-  // Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (zero-extend to 64 bits)
-  assert_different_registers(Rs, Rtmp1, Rtmp2);
-  assert_different_registers(Rd, Rtmp1, Rtmp2);
-  grev16wu(Rd, Rs, Rtmp1, Rtmp2);
-  slli(Rtmp2, Rd, 48);
-  srli(Rtmp2, Rtmp2, 32);
-  srli(Rd, Rd, 16);
-  orr(Rd, Rd, Rtmp2);
+// reverse bytes in halfword in lower 16 bits and zero-extend
+// Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits)
+void MacroAssembler::revb_h_h_u(Register Rd, Register Rs, Register tmp) {
+  if (UseRVB) {
+    rev8(Rd, Rs);
+    srli(Rd, Rd, 48);
+    return;
+  }
+  assert_different_registers(Rs, tmp);
+  assert_different_registers(Rd, tmp);
+  srli(tmp, Rs, 8);
+  andi(tmp, tmp, 0xFF);
+  andi(Rd, Rs, 0xFF);
+  slli(Rd, Rd, 8);
+  orr(Rd, Rd, tmp);
 }
 
-void MacroAssembler::grev16(Register Rd, Register Rs, Register Rtmp1, Register Rtmp2) {
-  // Reverse bytes in half-word (64bit)
-  // Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8]
-  assert_different_registers(Rs, Rtmp1, Rtmp2);
-  assert_different_registers(Rd, Rtmp1, Rtmp2);
-  reverseb16(Rd, Rs, Rtmp1, Rtmp2);
+// reverse bytes in halfwords in lower 32 bits and zero-extend
+// Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (zero-extend to 64 bits)
+void MacroAssembler::revb_h_w_u(Register Rd, Register Rs, Register tmp1, Register tmp2) {
+  if (UseRVB) {
+    rev8(Rd, Rs);
+    rori(Rd, Rd, 32);
+    roriw(Rd, Rd, 16);
+    zext_w(Rd, Rd);
+    return;
+  }
+  assert_different_registers(Rs, tmp1, tmp2);
+  assert_different_registers(Rd, tmp1, tmp2);
+  srli(tmp2, Rs, 16);
+  revb_h_h_u(tmp2, tmp2, tmp1);
+  revb_h_h_u(Rd, Rs, tmp1);
+  slli(tmp2, tmp2, 16);
+  orr(Rd, Rd, tmp2);
+}
+
+// This method is only used for revb_h
+// Rd = Rs[47:0] Rs[55:48] Rs[63:56]
+void MacroAssembler::revb_h_helper(Register Rd, Register Rs, Register tmp1, Register tmp2) {
+  assert_different_registers(Rs, tmp1, tmp2);
+  assert_different_registers(Rd, tmp1);
+  srli(tmp1, Rs, 48);
+  andi(tmp2, tmp1, 0xFF);
+  slli(tmp2, tmp2, 8);
+  srli(tmp1, tmp1, 8);
+  orr(tmp1, tmp1, tmp2);
+  slli(Rd, Rs, 16);
+  orr(Rd, Rd, tmp1);
+}
+
+// reverse bytes in each halfword
+// Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8]
+void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) {
+  if (UseRVB) {
+    assert_different_registers(Rs, tmp1);
+    assert_different_registers(Rd, tmp1);
+    rev8(Rd, Rs);
+    zext_w(tmp1, Rd);
+    roriw(tmp1, tmp1, 16);
+    slli(tmp1, tmp1, 32);
+    srli(Rd, Rd, 32);
+    roriw(Rd, Rd, 16);
+    zext_w(Rd, Rd);
+    orr(Rd, Rd, tmp1);
+    return;
+  }
+  assert_different_registers(Rs, tmp1, tmp2);
+  assert_different_registers(Rd, tmp1, tmp2);
+  revb_h_helper(Rd, Rs, tmp1, tmp2);
   for (int i = 0; i < 3; ++i) {
-    reverseb16(Rd, Rd, Rtmp1, Rtmp2);
+    revb_h_helper(Rd, Rd, tmp1, tmp2);
   }
 }
 
-void MacroAssembler::grev32(Register Rd, Register Rs, Register Rtmp1, Register Rtmp2) {
-  // Reverse bytes in word (64bit)
-  // Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24]
-  assert_different_registers(Rs, Rtmp1, Rtmp2);
-  assert_different_registers(Rd, Rtmp1, Rtmp2);
-  grev16(Rd, Rs, Rtmp1, Rtmp2);
-  reverseh32(Rd, Rd, Rtmp1, Rtmp2);
-  reverseh32(Rd, Rd, Rtmp1, Rtmp2);
+// reverse bytes in each word
+// Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24]
+void MacroAssembler::revb_w(Register Rd, Register Rs, Register tmp1, Register tmp2) {
+  if (UseRVB) {
+    rev8(Rd, Rs);
+    rori(Rd, Rd, 32);
+    return;
+  }
+  assert_different_registers(Rs, tmp1, tmp2);
+  assert_different_registers(Rd, tmp1, tmp2);
+  revb(Rd, Rs, tmp1, tmp2);
+  ror_imm(Rd, Rd, 32);
+}
+
+// reverse bytes in doubleword
+// Rd[63:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] Rs[39:32] Rs[47,40] Rs[55,48] Rs[63:56]
+void MacroAssembler::revb(Register Rd, Register Rs, Register tmp1, Register tmp2) {
+  if (UseRVB) {
+    rev8(Rd, Rs);
+    return;
+  }
+  assert_different_registers(Rs, tmp1, tmp2);
+  assert_different_registers(Rd, tmp1, tmp2);
+  andi(tmp1, Rs, 0xFF);
+  slli(tmp1, tmp1, 8);
+  for (int step = 8; step < 56; step += 8) {
+    srli(tmp2, Rs, step);
+    andi(tmp2, tmp2, 0xFF);
+    orr(tmp1, tmp1, tmp2);
+    slli(tmp1, tmp1, 8);
+  }
+  srli(Rd, Rs, 56);
+  andi(Rd, Rd, 0xFF);
+  orr(Rd, tmp1, Rd);
 }
 
-void MacroAssembler::grev(Register Rd, Register Rs, Register Rtmp1, Register Rtmp2) {
-  // Reverse bytes in double-word (64bit)
-  // Rd[63:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] Rs[39:32] Rs[47,40] Rs[55,48] Rs[63:56]
-  assert_different_registers(Rs, Rtmp1, Rtmp2);
-  assert_different_registers(Rd, Rtmp1, Rtmp2);
-  grev32(Rd, Rs, Rtmp1, Rtmp2);
-  slli(Rtmp2, Rd, 32);
-  srli(Rd, Rd, 32);
-  orr(Rd, Rd, Rtmp2);
+// rotate right with shift bits
+void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp)
+{
+  if (UseRVB) {
+    rori(dst, src, shift);
+    return;
+  }
+
+  assert_different_registers(dst, tmp);
+  assert_different_registers(src, tmp);
+  assert(shift < 64, "shift amount must be < 64");
+  slli(tmp, src, 64 - shift);
+  srli(dst, src, shift);
+  orr(dst, dst, tmp);
 }
 
 void MacroAssembler::andi(Register Rd, Register Rn, int64_t imm, Register tmp) {
@@ -1838,7 +1791,7 @@ void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, R
   if (src.is_register()) {
     orr(tmp1, tmp1, src.as_register());
   } else {
-    if(is_imm_in_range(src.as_constant(), 12, 0)) {
+    if (is_imm_in_range(src.as_constant(), 12, 0)) {
       ori(tmp1, tmp1, src.as_constant());
     } else {
       assert_different_registers(tmp1, tmp2);
@@ -1856,11 +1809,6 @@ void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp,
       slli(tmp, tmp, Universe::narrow_klass_shift());
       beq(trial_klass, tmp, L);
       return;
-    } else if (((uint64_t)Universe::narrow_klass_base() & 0xffffffff) == 0
-               && Universe::narrow_klass_shift() == 0) {
-      // Only the bottom 32 bits matter
-      beq(trial_klass, tmp, L);
-      return;
     }
     decode_klass_not_null(tmp);
   } else {
@@ -1869,10 +1817,10 @@ void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp,
   beq(trial_klass, tmp, L);
 }
 
-// Move an oop into a register. immediate is true if we want
-// immediate instructions and nmethod entry barriers are not enabled.
-// i.e. we are not going to patch this instruction while the code is being
-// executed by another thread.
+// Move an oop into a register.  immediate is true if we want
+// immediate instructions, i.e. we are not going to patch this
+// instruction while the code is being executed by another thread.  In
+// that case we can use move immediates rather than the constant pool.
 void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) {
   int oop_index;
   if (obj == NULL) {
@@ -2077,16 +2025,16 @@ void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register
 
   assert_different_registers(src, xbase);
   li(xbase, (uintptr_t)Universe::narrow_klass_base());
+
   if (Universe::narrow_klass_shift() != 0) {
     assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
     assert_different_registers(t0, xbase);
-    slli(t0, src, LogKlassAlignmentInBytes);
-    add(dst, xbase, t0);
+    shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes);
   } else {
     add(dst, xbase, src);
   }
-  if (xbase == xheapbase) { reinit_heapbase(); }
 
+  if (xbase == xheapbase) { reinit_heapbase(); }
 }
 
 void MacroAssembler::encode_klass_not_null(Register r) {
@@ -2108,7 +2056,7 @@ void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register
 
   if (((uint64_t)(uintptr_t)Universe::narrow_klass_base() & 0xffffffff) == 0 &&
       Universe::narrow_klass_shift() == 0) {
-    zero_ext(dst, src, 32); // clear upper 32 bits
+    zero_extend(dst, src, 32);
     return;
   }
 
@@ -2160,8 +2108,7 @@ void  MacroAssembler::decode_heap_oop(Register d, Register s) {
     Label done;
     mv(d, s);
     beqz(s, done);
-    slli(d, s, LogMinObjAlignmentInBytes);
-    add(d, xheapbase, d);
+    shadd(d, s, xheapbase, d, LogMinObjAlignmentInBytes);
     bind(done);
   }
   verify_oop(d, "broken oop in decode_heap_oop");
@@ -2243,11 +2190,11 @@ void MacroAssembler::lookup_interface_method(Register recv_klass,
                                              Register intf_klass,
                                              RegisterOrConstant itable_index,
                                              Register method_result,
-                                             Register scan_temp,
+                                             Register scan_tmp,
                                              Label& L_no_such_interface,
                                              bool return_method) {
-  assert_different_registers(recv_klass, intf_klass, scan_temp);
-  assert_different_registers(method_result, intf_klass, scan_temp);
+  assert_different_registers(recv_klass, intf_klass, scan_tmp);
+  assert_different_registers(method_result, intf_klass, scan_tmp);
   assert(recv_klass != method_result || !return_method,
          "recv_klass can be destroyed when mehtid isn't needed");
   assert(itable_index.is_constant() || itable_index.as_register() == method_result,
@@ -2260,12 +2207,11 @@ void MacroAssembler::lookup_interface_method(Register recv_klass,
   int vte_size    = vtableEntry::size_in_bytes();
   assert(vte_size == wordSize, "else adjust times_vte_scale");
 
-  lwu(scan_temp, Address(recv_klass, Klass::vtable_length_offset()));
+  lwu(scan_tmp, Address(recv_klass, Klass::vtable_length_offset()));
 
   // %%% Could store the aligned, prescaled offset in the klassoop.
-  slli(scan_temp, scan_temp, 3);
-  add(scan_temp, recv_klass, scan_temp);
-  add(scan_temp, scan_temp, vtable_base);
+  shadd(scan_tmp, scan_tmp, recv_klass, scan_tmp, 3);
+  add(scan_tmp, scan_tmp, vtable_base);
 
   if (return_method) {
     // Adjust recv_klass by scaled itable_index, so we can free itable_index.
@@ -2283,23 +2229,23 @@ void MacroAssembler::lookup_interface_method(Register recv_klass,
 
   Label search, found_method;
 
-  ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
+  ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes()));
   beq(intf_klass, method_result, found_method);
   bind(search);
   // Check that the previous entry is non-null. A null entry means that
   // the receiver class doens't implement the interface, and wasn't the
   // same as when the caller was compiled.
   beqz(method_result, L_no_such_interface, /* is_far */ true);
-  addi(scan_temp, scan_temp, scan_step);
-  ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
+  addi(scan_tmp, scan_tmp, scan_step);
+  ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes()));
   bne(intf_klass, method_result, search);
 
   bind(found_method);
 
   // Got a hit.
   if (return_method) {
-    lwu(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
-    add(method_result, recv_klass, scan_temp);
+    lwu(scan_tmp, Address(scan_tmp, itableOffsetEntry::offset_offset_in_bytes()));
+    add(method_result, recv_klass, scan_tmp);
     ld(method_result, Address(method_result));
   }
 }
@@ -2314,8 +2260,7 @@ void MacroAssembler::lookup_virtual_method(Register recv_klass,
   int vtable_offset_in_bytes = base + vtableEntry::method_offset_in_bytes();
 
   if (vtable_index.is_register()) {
-    slli(method_result, vtable_index.as_register(), LogBytesPerWord);
-    add(method_result, recv_klass, method_result);
+    shadd(method_result, vtable_index.as_register(), recv_klass, method_result, LogBytesPerWord);
     ld(method_result, Address(method_result, vtable_offset_in_bytes));
   } else {
     vtable_offset_in_bytes += vtable_index.as_constant() * wordSize;
@@ -2324,8 +2269,6 @@ void MacroAssembler::lookup_virtual_method(Register recv_klass,
 }
 
 void MacroAssembler::membar(uint32_t order_constraint) {
-  if (!os::is_MP()) { return; }
-
   address prev = pc() - NativeMembar::instruction_size;
   address last = code()->last_insn();
 
@@ -2363,29 +2306,14 @@ Address MacroAssembler::form_address(Register Rd, Register base, long byte_offse
 
 void MacroAssembler::check_klass_subtype(Register sub_klass,
                                          Register super_klass,
-                                         Register temp_reg,
+                                         Register tmp_reg,
                                          Label& L_success) {
   Label L_failure;
-  check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL);
-  check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
+  check_klass_subtype_fast_path(sub_klass, super_klass, tmp_reg, &L_success, &L_failure, NULL);
+  check_klass_subtype_slow_path(sub_klass, super_klass, tmp_reg, noreg, &L_success, NULL);
   bind(L_failure);
 }
 
-// Write serialization page so VM thread can do a pseudo remote membar.
-// We use the current thread pointer to calculate a thread specific
-// offset to write to within the page. This minimizes bus traffic
-// due to cache line collision.
-void MacroAssembler::serialize_memory(Register thread, Register tmp1, Register tmp2) {
-  srli(tmp2, thread, os::get_serialize_page_shift_count());
-
-  int mask = os::vm_page_size() - sizeof(int);
-  andi(tmp2, tmp2, mask, tmp1);
-
-  add(tmp1, tmp2, (intptr_t)os::get_memory_serialize_page());
-  membar(MacroAssembler::AnyAny);
-  sw(zr, Address(tmp1));
-}
-
 void MacroAssembler::safepoint_poll(Label& slow_path) {
   if (SafepointMechanism::uses_thread_local_poll()) {
     ld(t1, Address(xthread, Thread::polling_page_offset()));
@@ -2400,30 +2328,6 @@ void MacroAssembler::safepoint_poll(Label& slow_path) {
   }
 }
 
-// Just like safepoint_poll, but use an acquiring load for thread-
-// local polling.
-//
-// We need an acquire here to ensure that any subsequent load of the
-// global SafepointSynchronize::_state flag is ordered after this load
-// of the local Thread::_polling page.  We don't want this poll to
-// return false (i.e. not safepointing) and a later poll of the global
-// SafepointSynchronize::_state spuriously to return true.
-//
-// This is to avoid a race when we're in a native->Java transition
-// racing the code which wakes up from a safepoint.
-//
-void MacroAssembler::safepoint_poll_acquire(Label& slow_path) {
-  if (SafepointMechanism::uses_thread_local_poll()) {
-    membar(MacroAssembler::AnyAny);
-    ld(t1, Address(xthread, Thread::polling_page_offset()));
-    membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
-    andi(t0, t1, SafepointMechanism::poll_bit());
-    bnez(t0, slow_path);
-  } else {
-    safepoint_poll(slow_path);
-  }
-}
-
 void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp,
                                 Label &succeed, Label *fail) {
   // oldv holds comparison value
@@ -2431,17 +2335,16 @@ void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Reg
   // addr identifies memory word to compare against/update
   Label retry_load, nope;
   bind(retry_load);
-  // flush and load exclusive from the memory location
-  // and fail if it is not what we expect
+  // Load reserved from the memory location
   lr_d(tmp, addr, Assembler::aqrl);
+  // Fail and exit if it is not what we expect
   bne(tmp, oldv, nope);
-  // if we store+flush with no intervening write tmp wil be zero
+  // If the store conditional succeeds, tmp will be zero
   sc_d(tmp, newv, addr, Assembler::rl);
   beqz(tmp, succeed);
-  // retry so we only ever return after a load fails to compare
-  // ensures we don't return a stale value after a failed write.
+  // Retry only when the store conditional failed
   j(retry_load);
-  // if the memory word differs we return it in oldv and signal a fail
+
   bind(nope);
   membar(AnyAny);
   mv(oldv, tmp);
@@ -2468,7 +2371,7 @@ void MacroAssembler::load_reserved(Register addr,
       break;
     case uint32:
       lr_w(t0, addr, acquire);
-      clear_upper_bits(t0, 32);
+      zero_extend(t0, t0, 32);
       break;
     default:
       ShouldNotReachHere();
@@ -2509,8 +2412,9 @@ void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expecte
   if (size == int8) {
     addi(mask, zr, 0xff);
   } else {
+    // size == int16 case
     addi(mask, zr, -1);
-    zero_ext(mask, mask, registerSize - 16);
+    zero_extend(mask, mask, 16);
   }
   sll(mask, mask, shift);
 
@@ -2563,9 +2467,10 @@ void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected,
     srl(result, tmp, shift);
 
     if (size == int8) {
-      sign_ext(result, result, registerSize - 8);
-    } else if (size == int16) {
-      sign_ext(result, result, registerSize - 16);
+      sign_extend(result, result, 8);
+    } else {
+      // size == int16 case
+      sign_extend(result, result, 16);
     }
   }
 }
@@ -2695,7 +2600,7 @@ ATOMIC_XCHG(xchgalw, amoswap_w, Assembler::aq, Assembler::rl)
 #define ATOMIC_XCHGU(OP1, OP2)                                                       \
 void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) {     \
   atomic_##OP2(prev, newv, addr);                                                    \
-  clear_upper_bits(prev, 32);                                                        \
+  zero_extend(prev, prev, 32);                                                       \
   return;                                                                            \
 }
 
@@ -2704,228 +2609,6 @@ ATOMIC_XCHGU(xchgalwu, xchgalw)
 
 #undef ATOMIC_XCHGU
 
-void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done, Register flag) {
-  assert(UseBiasedLocking, "why call this otherwise?");
-
-  // Check for biased locking unlock case, which is a no-op
-  // Note: we do not have to check the thread ID for two reasons.
-  // First, the interpreter checks for IllegalMonitorStateException at
-  // a higher level. Second, if the bias was revoked while we held the
-  // lock, the object could not be rebiased toward another thread, so
-  // the bias bit would be clear.
-  ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
-  andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place); // 1 << 3
-  sub(temp_reg, temp_reg, (u1)markOopDesc::biased_lock_pattern);
-  if (flag->is_valid()) { mv(flag, temp_reg); }
-  beqz(temp_reg, done);
-}
-
-void MacroAssembler::load_prototype_header(Register dst, Register src) {
-  load_klass(dst, src);
-  ld(dst, Address(dst, Klass::prototype_header_offset()));
-}
-
-int MacroAssembler::biased_locking_enter(Register lock_reg,
-                                          Register obj_reg,
-                                          Register swap_reg,
-                                          Register tmp_reg,
-                                          bool swap_reg_contains_mark,
-                                          Label& done,
-                                          Label* slow_case,
-                                          BiasedLockingCounters* counters,
-                                          Register flag) {
-  assert(UseBiasedLocking, "why call this otherwise?");
-  assert_different_registers(lock_reg, obj_reg, swap_reg);
-
-  if (PrintBiasedLockingStatistics && counters == NULL) {
-    counters = BiasedLocking::counters();
-  }
-
-  assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, t0, flag);
-  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
-  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
-
-  // Biased locking
-  // See whether the lock is currently biased toward our thread and
-  // whether the epoch is still valid
-  // Note that the runtime guarantees sufficient alignment of JavaThread
-  // pointers to allow age to be placed into low bits
-  // First check to see whether biasing is even enabled for this object
-  Label cas_label;
-  int null_check_offset = -1;
-  if (!swap_reg_contains_mark) {
-    null_check_offset = offset();
-    ld(swap_reg, mark_addr);
-  }
-  andi(tmp_reg, swap_reg, markOopDesc::biased_lock_mask_in_place);
-  xori(t0, tmp_reg, (u1)markOopDesc::biased_lock_pattern);
-  bnez(t0, cas_label); // don't care flag unless jumping to done
-  // The bias pattern is present in the object's header. Need to check
-  // whether the bias owner and the epoch are both still current.
-  load_prototype_header(tmp_reg, obj_reg);
-  orr(tmp_reg, tmp_reg, xthread);
-  xorr(tmp_reg, swap_reg, tmp_reg);
-  andi(tmp_reg, tmp_reg, ~((int) markOopDesc::age_mask_in_place));
-  if (flag->is_valid()) {
-    mv(flag, tmp_reg);
-  }
-
-  if (counters != NULL) {
-    Label around;
-    bnez(tmp_reg, around);
-    atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, t0);
-    j(done);
-    bind(around);
-  } else {
-    beqz(tmp_reg, done);
-  }
-
-  Label try_revoke_bias;
-  Label try_rebias;
-
-  // At this point we know that the header has the bias pattern and
-  // that we are not the bias owner in the current epoch. We need to
-  // figure out more details about the state of the header in order to
-  // know what operations can be legally performed on the object's
-  // header.
-
-  // If the low three bits in the xor result aren't clear, that means
-  // the prototype header is no longer biased and we have to revoke
-  // the bias on this object.
-  andi(t0, tmp_reg, markOopDesc::biased_lock_mask_in_place);
-  bnez(t0, try_revoke_bias);
-
-  // Biasing is still enabled for this data type. See whether the
-  // epoch of the current bias is still valid, meaning that the epoch
-  // bits of the mark word are equal to the epoch bits of the
-  // prototype header. (Note that the prototype header's epoch bits
-  // only change at a safepoint.) If not, attempt to rebias the object
-  // toward the current thread. Note that we must be absolutely sure
-  // that the current epoch is invalid in order to do this because
-  // otherwise the manipulations it performs on the mark word are
-  // illegal.
-  andi(t0, tmp_reg, markOopDesc::epoch_mask_in_place);
-  bnez(t0, try_rebias);
-
-  // The epoch of the current bias is still valid but we know nothing
-  // about the owner; it might be set or it might be clear. Try to
-  // acquire the bias of the object using an atomic operation. If this
-  // fails we will go in to the runtime to revoke the object's bias.
-  // Note that we first construct the presumed unbiased header so we
-  // don't accidentally blow away another thread's valid bias.
-  {
-    Label cas_success;
-    Label counter;
-    li(t0, (int64_t)(markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
-    andr(swap_reg, swap_reg, t0);
-    orr(tmp_reg, swap_reg, xthread);
-    cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case);
-    // cas failed here if slow_cass == NULL
-    if (flag->is_valid()) {
-      li(flag, 1);
-      j(counter);
-    }
-
-    // If the biasing toward our thread failed, this means that
-    // another thread succeeded in biasing it toward itself and we
-    // need to revoke that bias. The revocation will occur in the
-    // interpreter runtime in the slow case.
-    bind(cas_success);
-    if (flag->is_valid()) {
-      li(flag, 0);
-      bind(counter);
-    }
-
-    if (counters != NULL) {
-      atomic_incw(Address((address)counters->anonymously_biased_lock_entry_count_addr()),
-                  tmp_reg, t0);
-    }
-  }
-  j(done);
-
-  bind(try_rebias);
-  // At this point we know the epoch has expired, meaning that the
-  // current "bias owner", if any, is actually invalid. Under these
-  // circumstances _only_, we are allowed to use the current header's
-  // value as the comparison value when doing the cas to acquire the
-  // bias in the current epoch. In other words, we allow transfer of
-  // the bias from one thread to another directly in this situation.
-  //
-  // FIXME: due to a lack of registers we currently blow away the age
-  // bits in this situation. Should attempt to preserve them.
-  {
-    Label cas_success;
-    Label counter;
-    load_prototype_header(tmp_reg, obj_reg);
-    orr(tmp_reg, xthread, tmp_reg);
-    cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case);
-    // cas failed here if slow_cass == NULL
-    if (flag->is_valid()) {
-      li(flag, 1);
-      j(counter);
-    }
-
-    // If the biasing toward our thread failed, then another thread
-    // succeeded in biasing it toward itself and we need to revoke that
-    // bias. The revocation will occur in the runtime in the slow case.
-    bind(cas_success);
-    if (flag->is_valid()) {
-      li(flag, 0);
-      bind(counter);
-    }
-
-    if (counters != NULL) {
-      atomic_incw(Address((address)counters->rebiased_lock_entry_count_addr()),
-                  tmp_reg, t0);
-    }
-  }
-  j(done);
-
-  // don't care flag unless jumping to done
-  bind(try_revoke_bias);
-  // The prototype mark in the klass doesn't have the bias bit set any
-  // more, indicating that objects of this data type are not supposed
-  // to be biased any more. We are going to try to reset the mark of
-  // this object to the prototype value and fall through to the
-  // CAS-based locking scheme. Note that if our CAS fails, it means
-  // that another thread raced us for the privilege of revoking the
-  // bias of this particular object, so it's okay to continue in the
-  // normal locking code.
-  //
-  // FIXME: due to a lack of registers we currently blow away the age
-  // bits in this situation. Should attempt to preserve them.
-  {
-    Label cas_success, nope;
-    load_prototype_header(tmp_reg, obj_reg);
-    cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, &nope);
-    bind(cas_success);
-
-    // Fall through to the normal CAS-based lock, because no matter what
-    // the result of the above CAS, some thread must have succeeded in
-    // removing the bias bit from the object's header.
-    if (counters != NULL) {
-      atomic_incw(Address((address)counters->revoked_lock_entry_count_addr()), tmp_reg,
-                  t0);
-    }
-    bind(nope);
-  }
-
-  bind(cas_label);
-
-  return null_check_offset;
-}
-
-void MacroAssembler::atomic_incw(Register counter_addr, Register tmp) {
-  Label retry_load;
-  bind(retry_load);
-  // flush and load exclusive from the memory location
-  lr_w(tmp, counter_addr);
-  addw(tmp, tmp, 1);
-  // if we store+flush with no intervening write tmp wil be zero
-  sc_w(tmp, tmp, counter_addr);
-  bnez(tmp, retry_load);
-}
-
 void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) {
   assert(ReservedCodeCacheSize < 4*G, "branch out of range");
   assert(CodeCache::find_blob(entry.target()) != NULL,
@@ -2962,15 +2645,15 @@ void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) {
 
 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
                                                    Register super_klass,
-                                                   Register temp_reg,
+                                                   Register tmp_reg,
                                                    Label* L_success,
                                                    Label* L_failure,
                                                    Label* L_slow_path,
                                                    Register super_check_offset) {
-  assert_different_registers(sub_klass, super_klass, temp_reg);
+  assert_different_registers(sub_klass, super_klass, tmp_reg);
   bool must_load_sco = (super_check_offset == noreg);
   if (must_load_sco) {
-    assert(temp_reg != noreg, "supply either a temp or a register offset");
+    assert(tmp_reg != noreg, "supply either a temp or a register offset");
   } else {
     assert_different_registers(sub_klass, super_klass, super_check_offset);
   }
@@ -3002,8 +2685,8 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
 
   // Check the supertype display:
   if (must_load_sco) {
-    lwu(temp_reg, super_check_offset_addr);
-    super_check_offset = temp_reg;
+    lwu(tmp_reg, super_check_offset_addr);
+    super_check_offset = tmp_reg;
   }
   add(t0, sub_klass, super_check_offset);
   Address super_check_addr(t0);
@@ -3034,15 +2717,15 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
 #undef final_jmp
 }
 
-// scans count pointer sized words at [addr] for occurence of value,
+// Scans count pointer sized words at [addr] for occurence of value,
 // generic
 void MacroAssembler::repne_scan(Register addr, Register value, Register count,
-                                Register temp) {
+                                Register tmp) {
   Label Lloop, Lexit;
   beqz(count, Lexit);
   bind(Lloop);
-  ld(temp, addr);
-  beq(value, temp, Lexit);
+  ld(tmp, addr);
+  beq(value, tmp, Lexit);
   add(addr, addr, wordSize);
   sub(count, count, 1);
   bnez(count, Lloop);
@@ -3051,15 +2734,15 @@ void MacroAssembler::repne_scan(Register addr, Register value, Register count,
 
 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
                                                    Register super_klass,
-                                                   Register temp_reg,
-                                                   Register temp2_reg,
+                                                   Register tmp1_reg,
+                                                   Register tmp2_reg,
                                                    Label* L_success,
                                                    Label* L_failure) {
-  assert_different_registers(sub_klass, super_klass, temp_reg);
-  if (temp2_reg != noreg) {
-    assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, t0);
+  assert_different_registers(sub_klass, super_klass, tmp1_reg);
+  if (tmp2_reg != noreg) {
+    assert_different_registers(sub_klass, super_klass, tmp1_reg, tmp2_reg, t0);
   }
-#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
+#define IS_A_TEMP(reg) ((reg) == tmp1_reg || (reg) == tmp2_reg)
 
   Label L_fallthrough;
   int label_nulls = 0;
@@ -3068,7 +2751,7 @@ void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
 
   assert(label_nulls <= 1, "at most one NULL in the batch");
 
-  // a couple of usefule fields in sub_klass:
+  // A couple of usefule fields in sub_klass:
   int ss_offset = in_bytes(Klass::secondary_supers_offset());
   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
   Address secondary_supers_addr(sub_klass, ss_offset);
@@ -3126,7 +2809,7 @@ void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
   // pop will restore x10, so we should use a temp register to keep its value
   mv(t1, x10);
 
-  // Unspill the temp. registers:
+  // Unspill the temp registers:
   pop_reg(pushed_registers, sp);
 
   bne(t1, t0, *L_failure);
@@ -3159,11 +2842,11 @@ void MacroAssembler::tlab_allocate(Register obj,
 void MacroAssembler::eden_allocate(Register obj,
                                    Register var_size_in_bytes,
                                    int con_size_in_bytes,
-                                   Register tmp1,
+                                   Register tmp,
                                    Label& slow_case,
                                    bool is_far) {
   BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
-  bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, slow_case, is_far);
+  bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp, slow_case, is_far);
 }
 
 
@@ -3188,7 +2871,8 @@ void MacroAssembler::get_thread(Register thread) {
 }
 
 void MacroAssembler::load_byte_map_base(Register reg) {
-  jbyte *byte_map_base = ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base();
+  jbyte *byte_map_base =
+    ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base();
   li(reg, (uint64_t)byte_map_base);
 }
 
@@ -3219,19 +2903,19 @@ void MacroAssembler::la_patchable(Register reg1, const Address &dest, int32_t &o
 }
 
 void MacroAssembler::build_frame(int framesize) {
-  assert(framesize >= 2 * wordSize, "framesize must include space for FP/LR");
-  assert(framesize % (2 * wordSize) == 0, "must preserve 2 * wordSize alignment");
+  assert(framesize >= 2, "framesize must include space for FP/RA");
+  assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
   sub(sp, sp, framesize);
   sd(fp, Address(sp, framesize - 2 * wordSize));
-  sd(lr, Address(sp, framesize - wordSize));
-  if (PreserveFramePointer) { add(fp, sp, framesize - 2 * wordSize); }
+  sd(ra, Address(sp, framesize - wordSize));
+  if (PreserveFramePointer) { add(fp, sp, framesize); }
 }
 
 void MacroAssembler::remove_frame(int framesize) {
-  assert(framesize >= 2 * wordSize, "framesize must include space for FP/LR");
-  assert(framesize % (2 * wordSize) == 0, "must preserve 2 * wordSize alignment");
+  assert(framesize >= 2, "framesize must include space for FP/RA");
+  assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
   ld(fp, Address(sp, framesize - 2 * wordSize));
-  ld(lr, Address(sp, framesize - wordSize));
+  ld(ra, Address(sp, framesize - wordSize));
   add(sp, sp, framesize);
 }
 
@@ -3242,7 +2926,7 @@ void MacroAssembler::reserved_stack_check() {
     ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset()));
     bltu(sp, t0, no_reserved_zone_enabling);
 
-    enter();   // LR and FP are live.
+    enter();   // RA and FP are live.
     mv(c_rarg0, xthread);
     int32_t offset = 0;
     la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)), offset);
@@ -3260,52 +2944,267 @@ void MacroAssembler::reserved_stack_check() {
     bind(no_reserved_zone_enabling);
 }
 
-// Move the address of the polling page into dest.
-void MacroAssembler::get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype) {
-  if (SafepointMechanism::uses_thread_local_poll()) {
-    ld(dest, Address(xthread, Thread::polling_page_offset()));
-  } else {
-    unsigned long align = (uintptr_t)page & 0xfff;
-    assert(align == 0, "polling page must be page aligned");
-    la_patchable(dest, Address(page, rtype), offset);
-  }
+void MacroAssembler::atomic_incw(Register counter_addr, Register tmp) {
+  Label retry_load;
+  bind(retry_load);
+  // flush and load exclusive from the memory location
+  lr_w(tmp, counter_addr);
+  addw(tmp, tmp, 1);
+  // if we store+flush with no intervening write tmp wil be zero
+  sc_w(tmp, tmp, counter_addr);
+  bnez(tmp, retry_load);
 }
 
-// Move the address of the polling page into dest.
-address MacroAssembler::read_polling_page(Register dest, address page, relocInfo::relocType rtype) {
-  int32_t offset = 0;
-  get_polling_page(dest, page, offset, rtype);
-  return read_polling_page(dest, offset, rtype);
+void MacroAssembler::load_prototype_header(Register dst, Register src) {
+  load_klass(dst, src);
+  ld(dst, Address(dst, Klass::prototype_header_offset()));
 }
 
-// Read the polling page.  The address of the polling page must
-// already be in r.
-address MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) {
-  InstructionMark im(this);
-  code_section()->relocate(inst_mark(), rtype);
-  lwu(zr, Address(r, offset));
-  return inst_mark();
-}
+int MacroAssembler::biased_locking_enter(Register lock_reg,
+                                         Register obj_reg,
+                                         Register swap_reg,
+                                         Register tmp_reg,
+                                         bool swap_reg_contains_mark,
+                                         Label& done,
+                                         Label* slow_case,
+                                         BiasedLockingCounters* counters,
+                                         Register flag) {
+  assert(UseBiasedLocking, "why call this otherwise?");
+  assert_different_registers(lock_reg, obj_reg, swap_reg);
 
-void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
-#ifdef ASSERT
-  {
-    ThreadInVMfromUnknown tiv;
-    assert (UseCompressedOops, "should only be used for compressed oops");
-    assert (Universe::heap() != NULL, "java heap should be initialized");
-    assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
-    assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop");
-  }
-#endif
-  int oop_index = oop_recorder()->find_index(obj);
-  InstructionMark im(this);
-  RelocationHolder rspec = oop_Relocation::spec(oop_index);
-  code_section()->relocate(inst_mark(), rspec);
-  li32(dst, 0xDEADBEEF);
-  clear_upper_bits(dst, 32); // clear upper 32bit, do not sign extend.
-}
+  if (PrintBiasedLockingStatistics && counters == NULL)
+    counters = BiasedLocking::counters();
 
-void  MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
+  assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, t0);
+  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
+  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
+
+  // Biased locking
+  // See whether the lock is currently biased toward our thread and
+  // whether the epoch is still valid
+  // Note that the runtime guarantees sufficient alignment of JavaThread
+  // pointers to allow age to be placed into low bits
+  // First check to see whether biasing is even enabled for this object
+  Label cas_label;
+  int null_check_offset = -1;
+  if (!swap_reg_contains_mark) {
+    null_check_offset = offset();
+    ld(swap_reg, mark_addr);
+  }
+  andi(tmp_reg, swap_reg, markOopDesc::biased_lock_mask_in_place);
+  li(t0, markOopDesc::biased_lock_pattern);
+  bne(t0, tmp_reg, cas_label);
+  // The bias pattern is present in the object's header. Need to check
+  // whether the bias owner and the epoch are both still current.
+  load_prototype_header(tmp_reg, obj_reg);
+  orr(tmp_reg, tmp_reg, xthread);
+  xorr(tmp_reg, swap_reg, tmp_reg);
+  andi(tmp_reg, tmp_reg, ~((int) markOopDesc::age_mask_in_place));
+  if (flag->is_valid()) {
+    mv(flag, tmp_reg);
+  }
+  if (counters != NULL) {
+    Label around;
+    bnez(tmp_reg, around);
+    atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, t0);
+    j(done);
+    bind(around);
+  } else {
+    beqz(tmp_reg, done);
+  }
+
+  Label try_revoke_bias;
+  Label try_rebias;
+
+  // At this point we know that the header has the bias pattern and
+  // that we are not the bias owner in the current epoch. We need to
+  // figure out more details about the state of the header in order to
+  // know what operations can be legally performed on the object's
+  // header.
+
+  // If the low three bits in the xor result aren't clear, that means
+  // the prototype header is no longer biased and we have to revoke
+  // the bias on this object.
+  andi(t0, tmp_reg, markOopDesc::biased_lock_mask_in_place);
+  bnez(t0, try_revoke_bias);
+
+  // Biasing is still enabled for this data type. See whether the
+  // epoch of the current bias is still valid, meaning that the epoch
+  // bits of the mark word are equal to the epoch bits of the
+  // prototype header. (Note that the prototype header's epoch bits
+  // only change at a safepoint.) If not, attempt to rebias the object
+  // toward the current thread. Note that we must be absolutely sure
+  // that the current epoch is invalid in order to do this because
+  // otherwise the manipulations it performs on the mark word are
+  // illegal.
+  andi(t0, tmp_reg, markOopDesc::epoch_mask_in_place);
+  bnez(t0, try_rebias);
+
+  // The epoch of the current bias is still valid but we know nothing
+  // about the owner; it might be set or it might be clear. Try to
+  // acquire the bias of the object using an atomic operation. If this
+  // fails we will go in to the runtime to revoke the object's bias.
+  // Note that we first construct the presumed unbiased header so we
+  // don't accidentally blow away another thread's valid bias.
+  {
+    Label cas_success;
+    Label counter;
+    mv(t0, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
+    andr(swap_reg, swap_reg, t0);
+    orr(tmp_reg, swap_reg, xthread);
+    cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case);
+    // cas failed here if slow_cass == NULL
+    if (flag->is_valid()) {
+      mv(flag, 1);
+      j(counter);
+    }
+    // If the biasing toward our thread failed, this means that
+    // another thread succeeded in biasing it toward itself and we
+    // need to revoke that bias. The revocation will occur in the
+    // interpreter runtime in the slow case.
+    bind(cas_success);
+    if (flag->is_valid()) {
+      mv(flag, 0);
+      bind(counter);
+    }
+    if (counters != NULL) {
+      atomic_incw(Address((address)counters->anonymously_biased_lock_entry_count_addr()),
+                  tmp_reg, t0);
+    }
+  }
+  j(done);
+
+  bind(try_rebias);
+  // At this point we know the epoch has expired, meaning that the
+  // current "bias owner", if any, is actually invalid. Under these
+  // circumstances _only_, we are allowed to use the current header's
+  // value as the comparison value when doing the cas to acquire the
+  // bias in the current epoch. In other words, we allow transfer of
+  // the bias from one thread to another directly in this situation.
+  //
+  // FIXME: due to a lack of registers we currently blow away the age
+  // bits in this situation. Should attempt to preserve them.
+  {
+    Label cas_success;
+    Label counter;
+    load_prototype_header(tmp_reg, obj_reg);
+    orr(tmp_reg, xthread, tmp_reg);
+    cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case);
+    // cas failed here if slow_cass == NULL
+    if (flag->is_valid()) {
+      mv(flag, 1);
+      j(counter);
+    }
+
+    // If the biasing toward our thread failed, then another thread
+    // succeeded in biasing it toward itself and we need to revoke that
+    // bias. The revocation will occur in the runtime in the slow case.
+    bind(cas_success);
+    if (flag->is_valid()) {
+      mv(flag, 0);
+      bind(counter);
+    }
+    if (counters != NULL) {
+      atomic_incw(Address((address)counters->rebiased_lock_entry_count_addr()),
+                  tmp_reg, t0);
+    }
+  }
+  j(done);
+
+  bind(try_revoke_bias);
+  // The prototype mark in the klass doesn't have the bias bit set any
+  // more, indicating that objects of this data type are not supposed
+  // to be biased any more. We are going to try to reset the mark of
+  // this object to the prototype value and fall through to the
+  // CAS-based locking scheme. Note that if our CAS fails, it means
+  // that another thread raced us for the privilege of revoking the
+  // bias of this particular object, so it's okay to continue in the
+  // normal locking code.
+  //
+  // FIXME: due to a lack of registers we currently blow away the age
+  // bits in this situation. Should attempt to preserve them.
+  {
+    Label cas_success, nope;
+    load_prototype_header(tmp_reg, obj_reg);
+    cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, &nope);
+    bind(cas_success);
+
+    // Fall through to the normal CAS-based lock, because no matter what
+    // the result of the above CAS, some thread must have succeeded in
+    // removing the bias bit from the object's header.
+    if (counters != NULL) {
+      atomic_incw(Address((address)counters->revoked_lock_entry_count_addr()), tmp_reg,
+                  t0);
+    }
+    bind(nope);
+  }
+
+  bind(cas_label);
+
+  return null_check_offset;
+}
+
+void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done, Register flag) {
+  assert(UseBiasedLocking, "why call this otherwise?");
+
+  // Check for biased locking unlock case, which is a no-op
+  // Note: we do not have to check the thread ID for two reasons.
+  // First, the interpreter checks for IllegalMonitorStateException at
+  // a higher level. Second, if the bias was revoked while we held the
+  // lock, the object could not be rebiased toward another thread, so
+  // the bias bit would be clear.
+  ld(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+  andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place);
+  sub(tmp_reg, tmp_reg, markOopDesc::biased_lock_pattern);
+  if (flag->is_valid()) { mv(flag, tmp_reg); }
+  beqz(tmp_reg, done);
+}
+
+// Move the address of the polling page into dest.
+void MacroAssembler::get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype) {
+  if (SafepointMechanism::uses_thread_local_poll()) {
+    ld(dest, Address(xthread, Thread::polling_page_offset()));
+  } else {
+    uint64_t align = (uint64_t)page & 0xfff;
+    assert(align == 0, "polling page must be page aligned");
+    la_patchable(dest, Address(page, rtype), offset);
+  }
+}
+
+// Read the polling page.  The address of the polling page must
+// already be in r.
+void MacroAssembler::read_polling_page(Register dest, address page, relocInfo::relocType rtype) {
+  int32_t offset = 0;
+  get_polling_page(dest, page, offset, rtype);
+  read_polling_page(dest, offset, rtype);
+}
+
+// Read the polling page.  The address of the polling page must
+// already be in r.
+void MacroAssembler::read_polling_page(Register dest, int32_t offset, relocInfo::relocType rtype) {
+  code_section()->relocate(pc(), rtype);
+  lwu(zr, Address(dest, offset));
+}
+
+void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
+#ifdef ASSERT
+  {
+    ThreadInVMfromUnknown tiv;
+    assert (UseCompressedOops, "should only be used for compressed oops");
+    assert (Universe::heap() != NULL, "java heap should be initialized");
+    assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
+    assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop");
+  }
+#endif
+  int oop_index = oop_recorder()->find_index(obj);
+  InstructionMark im(this);
+  RelocationHolder rspec = oop_Relocation::spec(oop_index);
+  code_section()->relocate(inst_mark(), rspec);
+  li32(dst, 0xDEADBEEF);
+  zero_extend(dst, dst, 32);
+}
+
+void  MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
   assert (UseCompressedClassPointers, "should only be used for compressed headers");
   assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
   int index = oop_recorder()->find_index(k);
@@ -3316,7 +3215,7 @@ void  MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
   code_section()->relocate(inst_mark(), rspec);
   narrowKlass nk = Klass::encode_klass(k);
   li32(dst, nk);
-  clear_upper_bits(dst, 32); // clear upper 32bit, do not sign extend.
+  zero_extend(dst, dst, 32);
 }
 
 // Maybe emit a call via a trampoline.  If the code cache is small
@@ -3376,7 +3275,7 @@ address MacroAssembler::ic_call(address entry, jint method_index) {
 //
 // Related trampoline stub for this call site in the stub section:
 //   load the call target from the constant pool
-//   branch (LR still points to the call site above)
+//   branch (RA still points to the call site above)
 
 address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
                                              address dest) {
@@ -3392,7 +3291,8 @@ address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
 
   // make sure 4 byte aligned here, so that the destination address would be
   // 8 byte aligned after 3 intructions
-  while (offset() % wordSize == 0) { nop(); }
+  // when we reach here we may get a 2-byte alignment so need to align it
+  align(wordSize, NativeCallTrampolineStub::data_offset);
 
   relocate(trampoline_stub_Relocation::spec(code()->insts()->start() +
                                             insts_call_instruction_offset));
@@ -3405,7 +3305,9 @@ address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
   ld(t0, target);  // auipc + ld
   jr(t0);          // jalr
   bind(target);
-  assert(offset() % wordSize == 0, "address loaded by ld must be 8-byte aligned under riscv64");
+  assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset,
+         "should be");
+  assert(offset() % wordSize == 0, "bad alignment");
   emit_int64((intptr_t)dest);
 
   const address stub_start_addr = addr_at(stub_start_offset);
@@ -3452,513 +3354,615 @@ void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) {
   beq(src1, t0, equal);
 }
 
-void MacroAssembler::load_method_holder(Register holder, Register method) {
-  ld(holder, Address(method, Method::const_offset()));                      // ConstMethod*
-  ld(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
-  ld(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass*
-}
-
-void MacroAssembler::oop_beq(Register obj1, Register obj2, Label& L_equal, bool is_far) {
-  beq(obj1, obj2, L_equal, is_far);
+// string indexof
+// compute index by trailing zeros
+void MacroAssembler::compute_index(Register haystack, Register trailing_zeros,
+                                   Register match_mask, Register result,
+                                   Register ch2, Register tmp,
+                                   bool haystack_isL)
+{
+  int haystack_chr_shift = haystack_isL ? 0 : 1;
+  srl(match_mask, match_mask, trailing_zeros);
+  srli(match_mask, match_mask, 1);
+  srli(tmp, trailing_zeros, LogBitsPerByte);
+  if (!haystack_isL) andi(tmp, tmp, 0xE);
+  add(haystack, haystack, tmp);
+  ld(ch2, Address(haystack));
+  if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift);
+  add(result, result, tmp);
 }
 
-void MacroAssembler::oop_bne(Register obj1, Register obj2, Label& L_nequal, bool is_far) {
-  bne(obj1, obj2, L_nequal, is_far);
+// string indexof
+// Find pattern element in src, compute match mask,
+// only the first occurrence of 0x80/0x8000 at low bits is the valid match index
+// match mask patterns and corresponding indices would be like:
+// - 0x8080808080808080 (Latin1)
+// -   7 6 5 4 3 2 1 0  (match index)
+// - 0x8000800080008000 (UTF16)
+// -   3   2   1   0    (match index)
+void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask,
+                                        Register mask1, Register mask2)
+{
+  xorr(src, pattern, src);
+  sub(match_mask, src, mask1);
+  orr(src, src, mask2);
+  notr(src, src);
+  andr(match_mask, match_mask, src);
 }
 
 #ifdef COMPILER2
-// TODO: wind: THIS FUNCTION IS TOTALLY DIFFERENT FROM JDK11
-//     generate_large_array_equals()
-//      git difftool ~/jvm/dragonwell11/src/hotspot/cpu/riscv64/stubGenerator_riscv64.cpp ~/jvm/dragonwell11-latest/src/hotspot/cpu/riscv64/stubGenerator_riscv64.cpp
-void MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
-                                    Register tmp4, Register tmp5, Register tmp6, Register result,
-                                    Register cnt1, int elem_size) {
-  Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR;
-  Register tmp1 = t0;
-  Register tmp2 = t1;
-  Register cnt2 = tmp2;  // cnt2 only used in array length compare
-  Register elem_per_word = tmp6;
-  int log_elem_size = exact_log2(elem_size);
-  int length_offset = arrayOopDesc::length_offset_in_bytes();
-  int base_offset   = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
+// Code for BigInteger::mulAdd instrinsic
+// out     = x10
+// in      = x11
+// offset  = x12  (already out.length-offset)
+// len     = x13
+// k       = x14
+// tmp     = x28
+//
+// pseudo code from java implementation:
+// long kLong = k & LONG_MASK;
+// carry = 0;
+// offset = out.length-offset - 1;
+// for (int j = len - 1; j >= 0; j--) {
+//     product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry;
+//     out[offset--] = (int)product;
+//     carry = product >>> 32;
+// }
+// return (int)carry;
+void MacroAssembler::mul_add(Register out, Register in, Register offset,
+                             Register len, Register k, Register tmp) {
+  Label L_tail_loop, L_unroll, L_end;
+  mv(tmp, out);
+  mv(out, zr);
+  blez(len, L_end);
+  zero_extend(k, k, 32);
+  slliw(t0, offset, LogBytesPerInt);
+  add(offset, tmp, t0);
+  slliw(t0, len, LogBytesPerInt);
+  add(in, in, t0);
+
+  const int unroll = 8;
+  li(tmp, unroll);
+  blt(len, tmp, L_tail_loop);
+  bind(L_unroll);
+  for (int i = 0; i < unroll; i++) {
+    sub(in, in, BytesPerInt);
+    lwu(t0, Address(in, 0));
+    mul(t1, t0, k);
+    add(t0, t1, out);
+    sub(offset, offset, BytesPerInt);
+    lwu(t1, Address(offset, 0));
+    add(t0, t0, t1);
+    sw(t0, Address(offset, 0));
+    srli(out, t0, 32);
+  }
+  subw(len, len, tmp);
+  bge(len, tmp, L_unroll);
+
+  bind(L_tail_loop);
+  blez(len, L_end);
+  sub(in, in, BytesPerInt);
+  lwu(t0, Address(in, 0));
+  mul(t1, t0, k);
+  add(t0, t1, out);
+  sub(offset, offset, BytesPerInt);
+  lwu(t1, Address(offset, 0));
+  add(t0, t0, t1);
+  sw(t0, Address(offset, 0));
+  srli(out, t0, 32);
+  subw(len, len, 1);
+  j(L_tail_loop);
+
+  bind(L_end);
+}
+
+// add two unsigned input and output carry
+void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry)
+{
+  assert_different_registers(dst, carry);
+  assert_different_registers(dst, src2);
+  add(dst, src1, src2);
+  sltu(carry, dst, src2);
+}
 
-  assert(elem_size == 1 || elem_size == 2, "must be char or byte");
-  assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6);
-  li(elem_per_word, wordSize / elem_size);
+// add two input with carry
+void MacroAssembler::adc(Register dst, Register src1, Register src2, Register carry)
+{
+  assert_different_registers(dst, carry);
+  add(dst, src1, src2);
+  add(dst, dst, carry);
+}
 
-  BLOCK_COMMENT("arrays_equals {");
+// add two unsigned input with carry and output carry
+void MacroAssembler::cadc(Register dst, Register src1, Register src2, Register carry)
+{
+  assert_different_registers(dst, src2);
+  adc(dst, src1, src2, carry);
+  sltu(carry, dst, src2);
+}
 
-  // if (a1 == a2), return true
-  oop_beq(a1, a2, SAME);
+void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
+                                     Register src1, Register src2, Register carry)
+{
+  cad(dest_lo, dest_lo, src1, carry);
+  add(dest_hi, dest_hi, carry);
+  cad(dest_lo, dest_lo, src2, carry);
+  add(final_dest_hi, dest_hi, carry);
+}
 
-  mv(result, false);
-  beqz(a1, DONE);
-  beqz(a2, DONE);
-  lwu(cnt1, Address(a1, length_offset));
-  lwu(cnt2, Address(a2, length_offset));
-  bne(cnt2, cnt1, DONE);
-  beqz(cnt1, SAME);
+/**
+ * Multiply 32 bit by 32 bit first loop.
+ */
+void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
+                                           Register y, Register y_idx, Register z,
+                                           Register carry, Register product,
+                                           Register idx, Register kdx)
+{
+  // jlong carry, x[], y[], z[];
+  // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
+  //     long product = y[idx] * x[xstart] + carry;
+  //     z[kdx] = (int)product;
+  //     carry = product >>> 32;
+  // }
+  // z[xstart] = (int)carry;
+
+  Label L_first_loop, L_first_loop_exit;
+  blez(idx, L_first_loop_exit);
+
+  shadd(t0, xstart, x, t0, LogBytesPerInt);
+  lwu(x_xstart, Address(t0, 0));
+
+  bind(L_first_loop);
+  subw(idx, idx, 1);
+  shadd(t0, idx, y, t0, LogBytesPerInt);
+  lwu(y_idx, Address(t0, 0));
+  mul(product, x_xstart, y_idx);
+  add(product, product, carry);
+  srli(carry, product, 32);
+  subw(kdx, kdx, 1);
+  shadd(t0, kdx, z, t0, LogBytesPerInt);
+  sw(product, Address(t0, 0));
+  bgtz(idx, L_first_loop);
+
+  bind(L_first_loop_exit);
+}
+
+/**
+ * Multiply 64 bit by 64 bit first loop.
+ */
+void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
+                                           Register y, Register y_idx, Register z,
+                                           Register carry, Register product,
+                                           Register idx, Register kdx)
+{
+  //
+  //  jlong carry, x[], y[], z[];
+  //  for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
+  //    huge_128 product = y[idx] * x[xstart] + carry;
+  //    z[kdx] = (jlong)product;
+  //    carry  = (jlong)(product >>> 64);
+  //  }
+  //  z[xstart] = carry;
+  //
 
-  slli(tmp5, cnt1, 3 + log_elem_size);
-  sub(tmp5, zr, tmp5);
-  add(a1, a1, base_offset);
-  add(a2, a2, base_offset);
-  ld(tmp3, Address(a1, 0));
-  ld(tmp4, Address(a2, 0));
-  ble(cnt1, elem_per_word, SHORT); // short or same
+  Label L_first_loop, L_first_loop_exit;
+  Label L_one_x, L_one_y, L_multiply;
 
-  // Main 16 byte comparison loop with 2 exits
-  bind(NEXT_DWORD); {
-    ld(tmp1, Address(a1, wordSize));
-    ld(tmp2, Address(a2, wordSize));
-    sub(cnt1, cnt1, 2 * wordSize / elem_size);
-    blez(cnt1, TAIL);
-    bne(tmp3, tmp4, DONE);
-    ld(tmp3, Address(a1, 2 * wordSize));
-    ld(tmp4, Address(a2, 2 * wordSize));
-    add(a1, a1, 2 * wordSize);
-    add(a2, a2, 2 * wordSize);
-    ble(cnt1, elem_per_word, TAIL2);
-  } beq(tmp1, tmp2, NEXT_DWORD);
-  j(DONE);
+  subw(xstart, xstart, 1);
+  bltz(xstart, L_one_x);
 
-  bind(TAIL);
-  xorr(tmp4, tmp3, tmp4);
-  xorr(tmp2, tmp1, tmp2);
-  sll(tmp2, tmp2, tmp5);
-  orr(tmp5, tmp4, tmp2);
-  j(IS_TMP5_ZR);
+  shadd(t0, xstart, x, t0, LogBytesPerInt);
+  ld(x_xstart, Address(t0, 0));
+  ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian
 
-  bind(TAIL2);
-  bne(tmp1, tmp2, DONE);
+  bind(L_first_loop);
+  subw(idx, idx, 1);
+  bltz(idx, L_first_loop_exit);
+  subw(idx, idx, 1);
+  bltz(idx, L_one_y);
 
-  bind(SHORT);
-  xorr(tmp4, tmp3, tmp4);
-  sll(tmp5, tmp4, tmp5);
+  shadd(t0, idx, y, t0, LogBytesPerInt);
+  ld(y_idx, Address(t0, 0));
+  ror_imm(y_idx, y_idx, 32); // convert big-endian to little-endian
+  bind(L_multiply);
 
-  bind(IS_TMP5_ZR);
-  bnez(tmp5, DONE);
+  mulhu(t0, x_xstart, y_idx);
+  mul(product, x_xstart, y_idx);
+  cad(product, product, carry, t1);
+  adc(carry, t0, zr, t1);
 
-  bind(SAME);
-  mv(result, true);
-  // That's it.
-  bind(DONE);
+  subw(kdx, kdx, 2);
+  ror_imm(product, product, 32); // back to big-endian
+  shadd(t0, kdx, z, t0, LogBytesPerInt);
+  sd(product, Address(t0, 0));
 
-  BLOCK_COMMENT("} array_equals");
-}
+  j(L_first_loop);
 
-// Compare Strings
+  bind(L_one_y);
+  lwu(y_idx, Address(y, 0));
+  j(L_multiply);
 
-// For Strings we're passed the address of the first characters in a1
-// and a2 and the length in cnt1.
-// elem_size is the element size in bytes: either 1 or 2.
-// All comparisons (including the final one, which may overlap) are
-// performed 8 bytes at a time.
+  bind(L_one_x);
+  lwu(x_xstart, Address(x, 0));
+  j(L_first_loop);
 
-void MacroAssembler::string_equals(Register a1, Register a2,
-                                    Register result, Register cnt1, int elem_size)
+  bind(L_first_loop_exit);
+}
+
+/**
+ * Multiply 128 bit by 128 bit. Unrolled inner loop.
+ *
+ */
+void MacroAssembler::multiply_128_x_128_loop(Register y, Register z,
+                                             Register carry, Register carry2,
+                                             Register idx, Register jdx,
+                                             Register yz_idx1, Register yz_idx2,
+                                             Register tmp, Register tmp3, Register tmp4,
+                                             Register tmp6, Register product_hi)
 {
-  Label SAME, DONE, SHORT, NEXT_WORD;
-  Register tmp1 = t0;
-  Register tmp2 = t1;
+  //   jlong carry, x[], y[], z[];
+  //   int kdx = xstart+1;
+  //   for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop
+  //     huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry;
+  //     jlong carry2  = (jlong)(tmp3 >>> 64);
+  //     huge_128 tmp4 = (y[idx]   * product_hi) + z[kdx+idx] + carry2;
+  //     carry  = (jlong)(tmp4 >>> 64);
+  //     z[kdx+idx+1] = (jlong)tmp3;
+  //     z[kdx+idx] = (jlong)tmp4;
+  //   }
+  //   idx += 2;
+  //   if (idx > 0) {
+  //     yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry;
+  //     z[kdx+idx] = (jlong)yz_idx1;
+  //     carry  = (jlong)(yz_idx1 >>> 64);
+  //   }
+  //
 
-  assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte");
-  assert_different_registers(a1, a2, result, cnt1, t0, t1);
+  Label L_third_loop, L_third_loop_exit, L_post_third_loop_done;
 
-  BLOCK_COMMENT("string_equals {");
+  srliw(jdx, idx, 2);
 
-  beqz(cnt1, SAME);
-  mv(result, false);
+  bind(L_third_loop);
 
-  // Check for short strings, i.e. smaller than wordSize.
-  sub(cnt1, cnt1, wordSize);
-  blez(cnt1, SHORT);
+  subw(jdx, jdx, 1);
+  bltz(jdx, L_third_loop_exit);
+  subw(idx, idx, 4);
 
-  // Main 8 byte comparison loop.
-  bind(NEXT_WORD); {
-    ld(tmp1, Address(a1, 0));
-    add(a1, a1, wordSize);
-    ld(tmp2, Address(a2, 0));
-    add(a2, a2, wordSize);
-    sub(cnt1, cnt1, wordSize);
-    bne(tmp1, tmp2, DONE);
-  } bgtz(cnt1, NEXT_WORD);
+  shadd(t0, idx, y, t0, LogBytesPerInt);
+  ld(yz_idx2, Address(t0, 0));
+  ld(yz_idx1, Address(t0, wordSize));
 
-  if (!AvoidUnalignedAccesses) {
-    // Last longword.  In the case where length == 4 we compare the
-    // same longword twice, but that's still faster than another
-    // conditional branch.
-    // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
-    // length == 4.
-    add(tmp1, a1, cnt1);
-    ld(tmp1, Address(tmp1, 0));
-    add(tmp2, a2, cnt1);
-    ld(tmp2, Address(tmp2, 0));
-    bne(tmp1, tmp2, DONE);
-    j(SAME);
-  }
+  shadd(tmp6, idx, z, t0, LogBytesPerInt);
 
-  bind(SHORT);
-  ld(tmp1, Address(a1));
-  ld(tmp2, Address(a2));
-  xorr(tmp1, tmp1, tmp2);
-  neg(cnt1, cnt1);
-  slli(cnt1, cnt1, LogBitsPerByte);
-  sll(tmp1, tmp1, cnt1);
-  bnez(tmp1, DONE);
+  ror_imm(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian
+  ror_imm(yz_idx2, yz_idx2, 32);
 
-  // Arrays are equal.
-  bind(SAME);
-  mv(result, true);
+  ld(t1, Address(tmp6, 0));
+  ld(t0, Address(tmp6, wordSize));
 
-  // That's it.
-  bind(DONE);
-  BLOCK_COMMENT("} string_equals");
-}
+  mul(tmp3, product_hi, yz_idx1); //  yz_idx1 * product_hi -> tmp4:tmp3
+  mulhu(tmp4, product_hi, yz_idx1);
 
-typedef void (MacroAssembler::* load_chr_insn)(Register rd, const Address &adr, Register temp);
+  ror_imm(t0, t0, 32, tmp); // convert big-endian to little-endian
+  ror_imm(t1, t1, 32, tmp);
 
-// Compare strings.
-void MacroAssembler::string_compare(Register str1, Register str2,
-                                     Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
-                                     Register tmp3, int ae)
-{
-  Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB,
-          DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
-          SHORT_LOOP_START, TAIL_CHECK, L;
+  mul(tmp, product_hi, yz_idx2); //  yz_idx2 * product_hi -> carry2:tmp
+  mulhu(carry2, product_hi, yz_idx2);
 
-  const int STUB_THRESHOLD = 64 + 8;
-  bool isLL = ae == StrIntrinsicNode::LL;
-  bool isLU = ae == StrIntrinsicNode::LU;
-  bool isUL = ae == StrIntrinsicNode::UL;
+  cad(tmp3, tmp3, carry, carry);
+  adc(tmp4, tmp4, zr, carry);
+  cad(tmp3, tmp3, t0, t0);
+  cadc(tmp4, tmp4, tmp, t0);
+  adc(carry, carry2, zr, t0);
+  cad(tmp4, tmp4, t1, carry2);
+  adc(carry, carry, zr, carry2);
 
-  bool str1_isL = isLL || isLU;
-  bool str2_isL = isLL || isUL;
+  ror_imm(tmp3, tmp3, 32); // convert little-endian to big-endian
+  ror_imm(tmp4, tmp4, 32);
+  sd(tmp4, Address(tmp6, 0));
+  sd(tmp3, Address(tmp6, wordSize));
 
-  // for L strings, 1 byte for 1 character
-  // for U strings, 2 bytes for 1 character
-  int str1_chr_size = str1_isL ? 1 : 2;
-  int str2_chr_size = str2_isL ? 1 : 2;
-  int minCharsInWord = isLL ? wordSize : wordSize / 2;
+  j(L_third_loop);
 
-  load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
-  load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
+  bind(L_third_loop_exit);
 
-  BLOCK_COMMENT("string_compare {");
+  andi(idx, idx, 0x3);
+  beqz(idx, L_post_third_loop_done);
 
-  // Bizzarely, the counts are passed in bytes, regardless of whether they
-  // are L or U strings, however the result is always in characters.
-  if (!str1_isL) {
-    sraiw(cnt1, cnt1, 1);
-  }
-  if (!str2_isL) {
-    sraiw(cnt2, cnt2, 1);
+  Label L_check_1;
+  subw(idx, idx, 2);
+  bltz(idx, L_check_1);
+
+  shadd(t0, idx, y, t0, LogBytesPerInt);
+  ld(yz_idx1, Address(t0, 0));
+  ror_imm(yz_idx1, yz_idx1, 32);
+
+  mul(tmp3, product_hi, yz_idx1); //  yz_idx1 * product_hi -> tmp4:tmp3
+  mulhu(tmp4, product_hi, yz_idx1);
+
+  shadd(t0, idx, z, t0, LogBytesPerInt);
+  ld(yz_idx2, Address(t0, 0));
+  ror_imm(yz_idx2, yz_idx2, 32, tmp);
+
+  add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2, tmp);
+
+  ror_imm(tmp3, tmp3, 32, tmp);
+  sd(tmp3, Address(t0, 0));
+
+  bind(L_check_1);
+
+  andi(idx, idx, 0x1);
+  subw(idx, idx, 1);
+  bltz(idx, L_post_third_loop_done);
+  shadd(t0, idx, y, t0, LogBytesPerInt);
+  lwu(tmp4, Address(t0, 0));
+  mul(tmp3, tmp4, product_hi); //  tmp4 * product_hi -> carry2:tmp3
+  mulhu(carry2, tmp4, product_hi);
+
+  shadd(t0, idx, z, t0, LogBytesPerInt);
+  lwu(tmp4, Address(t0, 0));
+
+  add2_with_carry(carry2, carry2, tmp3, tmp4, carry, t0);
+
+  shadd(t0, idx, z, t0, LogBytesPerInt);
+  sw(tmp3, Address(t0, 0));
+
+  slli(t0, carry2, 32);
+  srli(carry, tmp3, 32);
+  orr(carry, carry, t0);
+
+  bind(L_post_third_loop_done);
+}
+
+/**
+ * Code for BigInteger::multiplyToLen() intrinsic.
+ *
+ * x10: x
+ * x11: xlen
+ * x12: y
+ * x13: ylen
+ * x14: z
+ * x15: zlen
+ * x16: tmp1
+ * x17: tmp2
+ * x7:  tmp3
+ * x28: tmp4
+ * x29: tmp5
+ * x30: tmp6
+ * x31: tmp7
+ */
+void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen,
+                                     Register z, Register zlen,
+                                     Register tmp1, Register tmp2, Register tmp3, Register tmp4,
+                                     Register tmp5, Register tmp6, Register product_hi)
+{
+  assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
+
+  const Register idx = tmp1;
+  const Register kdx = tmp2;
+  const Register xstart = tmp3;
+
+  const Register y_idx = tmp4;
+  const Register carry = tmp5;
+  const Register product = xlen;
+  const Register x_xstart = zlen; // reuse register
+
+  mv(idx, ylen); // idx = ylen;
+  mv(kdx, zlen); // kdx = xlen+ylen;
+  mv(carry, zr); // carry = 0;
+
+  Label L_multiply_64_x_64_loop, L_done;
+
+  subw(xstart, xlen, 1);
+  bltz(xstart, L_done);
+
+  const Register jdx = tmp1;
+
+  if (AvoidUnalignedAccesses) {
+    // Check if x and y are both 8-byte aligned.
+    orr(t0, xlen, ylen);
+    andi(t0, t0, 0x1);
+    beqz(t0, L_multiply_64_x_64_loop);
+
+    multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
+    shadd(t0, xstart, z, t0, LogBytesPerInt);
+    sw(carry, Address(t0, 0));
+
+    Label L_second_loop_unaligned;
+    bind(L_second_loop_unaligned);
+    mv(carry, zr);
+    mv(jdx, ylen);
+    subw(xstart, xstart, 1);
+    bltz(xstart, L_done);
+    sub(sp, sp, 2 * wordSize);
+    sd(z, Address(sp, 0));
+    sd(zr, Address(sp, wordSize));
+    shadd(t0, xstart, z, t0, LogBytesPerInt);
+    addi(z, t0, 4);
+    shadd(t0, xstart, x, t0, LogBytesPerInt);
+    lwu(product, Address(t0, 0));
+    Label L_third_loop, L_third_loop_exit;
+
+    blez(jdx, L_third_loop_exit);
+
+    bind(L_third_loop);
+    subw(jdx, jdx, 1);
+    shadd(t0, jdx, y, t0, LogBytesPerInt);
+    lwu(t0, Address(t0, 0));
+    mul(t1, t0, product);
+    add(t0, t1, carry);
+    shadd(tmp6, jdx, z, t1, LogBytesPerInt);
+    lwu(t1, Address(tmp6, 0));
+    add(t0, t0, t1);
+    sw(t0, Address(tmp6, 0));
+    srli(carry, t0, 32);
+    bgtz(jdx, L_third_loop);
+
+    bind(L_third_loop_exit);
+    ld(z, Address(sp, 0));
+    addi(sp, sp, 2 * wordSize);
+    shadd(t0, xstart, z, t0, LogBytesPerInt);
+    sw(carry, Address(t0, 0));
+
+    j(L_second_loop_unaligned);
+  }
+
+  bind(L_multiply_64_x_64_loop);
+  multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
+
+  Label L_second_loop_aligned;
+  beqz(kdx, L_second_loop_aligned);
+
+  Label L_carry;
+  subw(kdx, kdx, 1);
+  beqz(kdx, L_carry);
+
+  shadd(t0, kdx, z, t0, LogBytesPerInt);
+  sw(carry, Address(t0, 0));
+  srli(carry, carry, 32);
+  subw(kdx, kdx, 1);
+
+  bind(L_carry);
+  shadd(t0, kdx, z, t0, LogBytesPerInt);
+  sw(carry, Address(t0, 0));
+
+  // Second and third (nested) loops.
+  //
+  // for (int i = xstart-1; i >= 0; i--) { // Second loop
+  //   carry = 0;
+  //   for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop
+  //     long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) +
+  //                    (z[k] & LONG_MASK) + carry;
+  //     z[k] = (int)product;
+  //     carry = product >>> 32;
+  //   }
+  //   z[i] = (int)carry;
+  // }
+  //
+  // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi
+
+  bind(L_second_loop_aligned);
+  mv(carry, zr); // carry = 0;
+  mv(jdx, ylen); // j = ystart+1
+
+  subw(xstart, xstart, 1); // i = xstart-1;
+  bltz(xstart, L_done);
+
+  sub(sp, sp, 4 * wordSize);
+  sd(z, Address(sp, 0));
+
+  Label L_last_x;
+  shadd(t0, xstart, z, t0, LogBytesPerInt);
+  addi(z, t0, 4);
+  subw(xstart, xstart, 1); // i = xstart-1;
+  bltz(xstart, L_last_x);
+
+  shadd(t0, xstart, x, t0, LogBytesPerInt);
+  ld(product_hi, Address(t0, 0));
+  ror_imm(product_hi, product_hi, 32); // convert big-endian to little-endian
+
+  Label L_third_loop_prologue;
+  bind(L_third_loop_prologue);
+
+  sd(ylen, Address(sp, wordSize));
+  sd(x, Address(sp, 2 * wordSize));
+  sd(xstart, Address(sp, 3 * wordSize));
+  multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product,
+                          tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi);
+  ld(z, Address(sp, 0));
+  ld(ylen, Address(sp, wordSize));
+  ld(x, Address(sp, 2 * wordSize));
+  ld(xlen, Address(sp, 3 * wordSize)); // copy old xstart -> xlen
+  addi(sp, sp, 4 * wordSize);
+
+  addiw(tmp3, xlen, 1);
+  shadd(t0, tmp3, z, t0, LogBytesPerInt);
+  sw(carry, Address(t0, 0));
+
+  subw(tmp3, tmp3, 1);
+  bltz(tmp3, L_done);
+
+  srli(carry, carry, 32);
+  shadd(t0, tmp3, z, t0, LogBytesPerInt);
+  sw(carry, Address(t0, 0));
+  j(L_second_loop_aligned);
+
+  // Next infrequent code is moved outside loops.
+  bind(L_last_x);
+  lwu(product_hi, Address(x, 0));
+  j(L_third_loop_prologue);
+
+  bind(L_done);
+}
+#endif
+
+// Count bits of trailing zero chars from lsb to msb until first non-zero element.
+// For LL case, one byte for one element, so shift 8 bits once, and for other case,
+// shift 16 bits once.
+void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2)
+{
+  if (UseRVB) {
+    assert_different_registers(Rd, Rs, tmp1);
+    int step = isLL ? 8 : 16;
+    ctz(Rd, Rs);
+    andi(tmp1, Rd, step - 1);
+    sub(Rd, Rd, tmp1);
+    return;
   }
+  assert_different_registers(Rd, Rs, tmp1, tmp2);
+  Label Loop;
+  int step = isLL ? 8 : 16;
+  li(Rd, -step);
+  mv(tmp2, Rs);
 
-  // Compute the minimum of the string lengths and save the difference in result.
-  sub(result, cnt1, cnt2);
-  bgt(cnt1, cnt2, L);
-  mv(cnt2, cnt1);
-  bind(L);
+  bind(Loop);
+  addi(Rd, Rd, step);
+  andi(tmp1, tmp2, ((1 << step) - 1));
+  srli(tmp2, tmp2, step);
+  beqz(tmp1, Loop);
+}
 
-  // A very short string
-  li(t0, minCharsInWord);
-  ble(cnt2, t0, SHORT_STRING);
+// This instruction reads adjacent 4 bytes from the lower half of source register,
+// inflate into a register, for example:
+// Rs: A7A6A5A4A3A2A1A0
+// Rd: 00A300A200A100A0
+void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2)
+{
+  assert_different_registers(Rd, Rs, tmp1, tmp2);
+  li(tmp1, 0xFF);
+  mv(Rd, zr);
+  for (int i = 0; i <= 3; i++)
+  {
+    andr(tmp2, Rs, tmp1);
+    if (i) {
+      slli(tmp2, tmp2, i * 8);
+    }
+    orr(Rd, Rd, tmp2);
+    if (i != 3) {
+      slli(tmp1, tmp1, 8);
+    }
+  }
+}
 
-  // Compare longwords
-  // load first parts of strings and finish initialization while loading
+// This instruction reads adjacent 4 bytes from the upper half of source register,
+// inflate into a register, for example:
+// Rs: A7A6A5A4A3A2A1A0
+// Rd: 00A700A600A500A4
+void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2)
+{
+  assert_different_registers(Rd, Rs, tmp1, tmp2);
+  li(tmp1, 0xFF00000000);
+  mv(Rd, zr);
+  for (int i = 0; i <= 3; i++)
   {
-    if (str1_isL == str2_isL) { // LL or UU
-      // load 8 bytes once to compare
-      ld(tmp1, Address(str1));
-      beq(str1, str2, DONE);
-      ld(tmp2, Address(str2));
-      li(t0, STUB_THRESHOLD);
-      bge(cnt2, t0, STUB);
-      sub(cnt2, cnt2, minCharsInWord);
-      beqz(cnt2, TAIL_CHECK);
-      // convert cnt2 from characters to bytes
-      if(!str1_isL) {
-        slli(cnt2, cnt2, 1);
-      }
-      add(str2, str2, cnt2);
-      add(str1, str1, cnt2);
-      sub(cnt2, zr, cnt2);
-    } else if (isLU) { // LU case
-      lwu(tmp1, Address(str1));
-      ld(tmp2, Address(str2));
-      li(t0, STUB_THRESHOLD);
-      bge(cnt2, t0, STUB);
-      addi(cnt2, cnt2, -4);
-      add(str1, str1, cnt2);
-      sub(cnt1, zr, cnt2);
-      slli(cnt2, cnt2, 1);
-      add(str2, str2, cnt2);
-      inflate_lo32(tmp3, tmp1);
-      mv(tmp1, tmp3);
-      sub(cnt2, zr, cnt2);
-      addi(cnt1, cnt1, 4);
-    } else { // UL case
-      ld(tmp1, Address(str1));
-      lwu(tmp2, Address(str2));
-      li(t0, STUB_THRESHOLD);
-      bge(cnt2, t0, STUB);
-      addi(cnt2, cnt2, -4);
-      slli(t0, cnt2, 1);
-      sub(cnt1, zr, t0);
-      add(str1, str1, t0);
-      add(str2, str2, cnt2);
-      inflate_lo32(tmp3, tmp2);
-      mv(tmp2, tmp3);
-      sub(cnt2, zr, cnt2);
-      addi(cnt1, cnt1, 8);
+    andr(tmp2, Rs, tmp1);
+    orr(Rd, Rd, tmp2);
+    srli(Rd, Rd, 8);
+    if (i != 3) {
+      slli(tmp1, tmp1, 8);
     }
-    addi(cnt2, cnt2, isUL ? 4 : 8);
-    bgez(cnt2, TAIL);
-    xorr(tmp3, tmp1, tmp2);
-    bnez(tmp3, DIFFERENCE);
-
-    // main loop
-    bind(NEXT_WORD);
-    if (str1_isL == str2_isL) { // LL or UU
-      add(t0, str1, cnt2);
-      ld(tmp1, Address(t0));
-      add(t0, str2, cnt2);
-      ld(tmp2, Address(t0));
-      addi(cnt2, cnt2, 8);
-    } else if (isLU) { // LU case
-      add(t0, str1, cnt1);
-      lwu(tmp1, Address(t0));
-      add(t0, str2, cnt2);
-      ld(tmp2, Address(t0));
-      addi(cnt1, cnt1, 4);
-      inflate_lo32(tmp3, tmp1);
-      mv(tmp1, tmp3);
-      addi(cnt2, cnt2, 8);
-    } else { // UL case
-      add(t0, str2, cnt2);
-      lwu(tmp2, Address(t0));
-      add(t0, str1, cnt1);
-      ld(tmp1, Address(t0));
-      inflate_lo32(tmp3, tmp2);
-      mv(tmp2, tmp3);
-      addi(cnt1, cnt1, 8);
-      addi(cnt2, cnt2, 4);
-    }
-    bgez(cnt2, TAIL);
-
-    xorr(tmp3, tmp1, tmp2);
-    beqz(tmp3, NEXT_WORD);
-    j(DIFFERENCE);
-    bind(TAIL);
-    xorr(tmp3, tmp1, tmp2);
-    bnez(tmp3, DIFFERENCE);
-    // Last longword.  In the case where length == 4 we compare the
-    // same longword twice, but that's still faster than another
-    // conditional branch.
-    if (str1_isL == str2_isL) { // LL or UU
-      ld(tmp1, Address(str1));
-      ld(tmp2, Address(str2));
-    } else if (isLU) { // LU case
-      lwu(tmp1, Address(str1));
-      ld(tmp2, Address(str2));
-      inflate_lo32(tmp3, tmp1);
-      mv(tmp1, tmp3);
-    } else { // UL case
-      lwu(tmp2, Address(str2));
-      ld(tmp1, Address(str1));
-      inflate_lo32(tmp3, tmp2);
-      mv(tmp2, tmp3);
-    }
-    bind(TAIL_CHECK);
-    xorr(tmp3, tmp1, tmp2);
-    beqz(tmp3, DONE);
-
-    // Find the first different characters in the longwords and
-    // compute their difference.
-    bind(DIFFERENCE);
-    ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb
-    srl(tmp1, tmp1, result);
-    srl(tmp2, tmp2, result);
-    if (isLL) {
-      andi(tmp1, tmp1, 0xFF);
-      andi(tmp2, tmp2, 0xFF);
-    } else {
-      andi(tmp1, tmp1, 0xFFFF);
-      andi(tmp2, tmp2, 0xFFFF);
-    }
-    sub(result, tmp1, tmp2);
-    j(DONE);
-  }
-
-  bind(STUB);
-  RuntimeAddress stub = NULL;
-  switch (ae) {
-    case StrIntrinsicNode::LL:
-      stub = RuntimeAddress(StubRoutines::riscv64::compare_long_string_LL());
-      break;
-    case StrIntrinsicNode::UU:
-      stub = RuntimeAddress(StubRoutines::riscv64::compare_long_string_UU());
-      break;
-    case StrIntrinsicNode::LU:
-      stub = RuntimeAddress(StubRoutines::riscv64::compare_long_string_LU());
-      break;
-    case StrIntrinsicNode::UL:
-      stub = RuntimeAddress(StubRoutines::riscv64::compare_long_string_UL());
-      break;
-    default:
-      ShouldNotReachHere();
-  }
-  assert(stub.target() != NULL, "compare_long_string stub has not been generated");
-  trampoline_call(stub);
-  j(DONE);
-
-  bind(SHORT_STRING);
-  // Is the minimum length zero?
-  beqz(cnt2, DONE);
-  // arrange code to do most branches while loading and loading next characters
-  // while comparing previous
-  (this->*str1_load_chr)(tmp1, Address(str1), t0);
-  addi(str1, str1, str1_chr_size);
-  addi(cnt2, cnt2, -1);
-  beqz(cnt2, SHORT_LAST_INIT);
-  (this->*str2_load_chr)(cnt1, Address(str2), t0);
-  addi(str2, str2, str2_chr_size);
-  j(SHORT_LOOP_START);
-  bind(SHORT_LOOP);
-  addi(cnt2, cnt2, -1);
-  beqz(cnt2, SHORT_LAST);
-  bind(SHORT_LOOP_START);
-  (this->*str1_load_chr)(tmp2, Address(str1), t0);
-  addi(str1, str1, str1_chr_size);
-  (this->*str2_load_chr)(t0, Address(str2), t0);
-  addi(str2, str2, str2_chr_size);
-  bne(tmp1, cnt1, SHORT_LOOP_TAIL);
-  addi(cnt2, cnt2, -1);
-  beqz(cnt2, SHORT_LAST2);
-  (this->*str1_load_chr)(tmp1, Address(str1), t0);
-  addi(str1, str1, str1_chr_size);
-  (this->*str2_load_chr)(cnt1, Address(str2), t0);
-  addi(str2, str2, str2_chr_size);
-  beq(tmp2, t0, SHORT_LOOP);
-  sub(result, tmp2, t0);
-  j(DONE);
-  bind(SHORT_LOOP_TAIL);
-  sub(result, tmp1, cnt1);
-  j(DONE);
-  bind(SHORT_LAST2);
-  beq(tmp2, t0, DONE);
-  sub(result, tmp2, t0);
-
-  j(DONE);
-  bind(SHORT_LAST_INIT);
-  (this->*str2_load_chr)(cnt1, Address(str2), t0);
-  addi(str2, str2, str2_chr_size);
-  bind(SHORT_LAST);
-  beq(tmp1, cnt1, DONE);
-  sub(result, tmp1, cnt1);
-
-  bind(DONE);
-
-  BLOCK_COMMENT("} string_compare");
-}
-#endif // COMPILER2
-
-// string indexof
-// compute index by trailing zeros
-void MacroAssembler::compute_index(Register haystack, Register trailing_zero,
-                                   Register match_mask, Register result,
-                                   Register ch2, Register tmp,
-                                   bool haystack_isL)
-{
-  int haystack_chr_shift = haystack_isL ? 0 : 1;
-  srl(match_mask, match_mask, trailing_zero);
-  srli(match_mask, match_mask, 1);
-  srli(tmp, trailing_zero, LogBitsPerByte);
-  if (!haystack_isL) andi(tmp, tmp, 0xE);
-  add(haystack, haystack, tmp);
-  ld(ch2, Address(haystack));
-  if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift);
-  add(result, result, tmp);
-}
-
-// string indexof
-// find pattern element in src, compute match mask,
-// only the first occurrence of 0x80/0x8000 at low bits is the valid match index
-// match mask patterns and corresponding indices would be like:
-// - 0x8080808080808080 (Latin1)
-// -   7 6 5 4 3 2 1 0  (match index)
-// - 0x8000800080008000 (UTF16)
-// -   3   2   1   0    (match index)
-void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask,
-                                        Register mask1, Register mask2)
-{
-  xorr(src, pattern, src);
-  sub(match_mask, src, mask1);
-  orr(src, src, mask2);
-  notr(src, src);
-  andr(match_mask, match_mask, src);
-}
-
-// count bits of trailing zero chars from lsb to msb until first non-zero element.
-// For LL case, one byte for one element, so shift 8 bits once, and for other case,
-// shift 16 bits once.
-void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register Rtmp1, Register Rtmp2)
-{
-  assert_different_registers(Rd, Rs, Rtmp1, Rtmp2);
-  Label Loop;
-  int step = isLL ? 8 : 16;
-  li(Rd, -step);
-  mv(Rtmp2, Rs);
-
-  bind(Loop);
-  addi(Rd, Rd, step);
-  andi(Rtmp1, Rtmp2, ((1 << step) - 1));
-  srli(Rtmp2, Rtmp2, step);
-  beqz(Rtmp1, Loop);
-}
-
-// This instruction reads adjacent 4 bytes from the lower half of source register,
-// inflate into a register, for example:
-// Rs: A7A6A5A4A3A2A1A0
-// Rd: 00A300A200A100A0
-void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register Rtmp1, Register Rtmp2)
-{
-  assert_different_registers(Rd, Rs, Rtmp1, Rtmp2);
-  li(Rtmp1, 0xFF);
-  mv(Rd, zr);
-  for (int i = 0; i <= 3; i++)
-  {
-    andr(Rtmp2, Rs, Rtmp1);
-    if (i) {
-      slli(Rtmp2, Rtmp2, i * 8);
-    }
-    orr(Rd, Rd, Rtmp2);
-    if (i != 3) {
-      slli(Rtmp1, Rtmp1, 8);
-    }
-  }
-}
-
-// This instruction reads adjacent 4 bytes from the upper half of source register,
-// inflate into a register, for example:
-// Rs: A7A6A5A4A3A2A1A0
-// Rd: 00A700A600A500A4
-void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register Rtmp1, Register Rtmp2)
-{
-  assert_different_registers(Rd, Rs, Rtmp1, Rtmp2);
-  li(Rtmp1, 0xFF00000000);
-  mv(Rd, zr);
-  for (int i = 0; i <= 3; i++)
-  {
-    andr(Rtmp2, Rs, Rtmp1);
-    orr(Rd, Rd, Rtmp2);
-    srli(Rd, Rd, 8);
-    if (i != 3) {
-      slli(Rtmp1, Rtmp1, 8);
-    }
-  }
-}
+  }
+}
 
 // The size of the blocks erased by the zero_blocks stub.  We must
 // handle anything smaller than this ourselves in zero_words().
@@ -3984,9 +3988,9 @@ address MacroAssembler::zero_words(Register ptr, Register cnt)
   Label around, done, done16;
   bltu(cnt, t0, around);
   {
-    RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::riscv64::zero_blocks());
+    RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::riscv::zero_blocks());
     assert(zero_blocks.target() != NULL, "zero_blocks stub has not been generated");
-    if (StubRoutines::riscv64::complete()) {
+    if (StubRoutines::riscv::complete()) {
       address tpc = trampoline_call(zero_blocks);
       if (tpc == NULL) {
         DEBUG_ONLY(reset_labels1(around));
@@ -4020,9 +4024,10 @@ address MacroAssembler::zero_words(Register ptr, Register cnt)
   return pc();
 }
 
-// base:         Address of a buffer to be zeroed, 8 bytes aligned.
-// cnt:          Immediate count in HeapWords.
 #define SmallArraySize (18 * BytesPerLong)
+
+// base:  Address of a buffer to be zeroed, 8 bytes aligned.
+// cnt:   Immediate count in HeapWords.
 void MacroAssembler::zero_words(Register base, u_int64_t cnt)
 {
   assert_different_registers(base, t0, t1);
@@ -4035,7 +4040,7 @@ void MacroAssembler::zero_words(Register base, u_int64_t cnt)
     }
   } else {
     const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll
-    int remainder = cnt %  unroll;
+    int remainder = cnt % unroll;
     for (int i = 0; i < remainder; i++) {
       sd(zr, Address(base, i * wordSize));
     }
@@ -4054,6 +4059,7 @@ void MacroAssembler::zero_words(Register base, u_int64_t cnt)
     add(loop_base, loop_base, unroll * wordSize);
     bnez(cnt_reg, loop);
   }
+
   BLOCK_COMMENT("} zero_words");
 }
 
@@ -4094,8 +4100,8 @@ void MacroAssembler::fill_words(Register base, Register cnt, Register value)
 
   andi(t0, cnt, unroll - 1);
   sub(cnt, cnt, t0);
-  slli(t1, t0, 3);
-  add(base, base, t1); // align 8, so first sd n % 8 = mod, next loop sd 8 * n.
+  // align 8, so first sd n % 8 = mod, next loop sd 8 * n.
+  shadd(base, t0, base, t1, 3);
   la(t1, entry);
   slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst)
   sub(t1, t1, t0);
@@ -4114,15 +4120,15 @@ void MacroAssembler::fill_words(Register base, Register cnt, Register value)
 }
 
 #define FCVT_SAFE(FLOATCVT, FLOATEQ)                                                             \
-void MacroAssembler:: FLOATCVT##_safe(Register dst, FloatRegister src, Register temp) {          \
+void MacroAssembler:: FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) {           \
   Label L_Okay;                                                                                  \
   fscsr(zr);                                                                                     \
   FLOATCVT(dst, src);                                                                            \
-  frcsr(temp);                                                                                   \
-  andi(temp, temp, 0x1E);                                                                        \
-  beqz(temp, L_Okay);                                                                            \
-  FLOATEQ(temp, src, src);                                                                       \
-  bnez(temp, L_Okay);                                                                            \
+  frcsr(tmp);                                                                                    \
+  andi(tmp, tmp, 0x1E);                                                                          \
+  beqz(tmp, L_Okay);                                                                             \
+  FLOATEQ(tmp, src, src);                                                                        \
+  bnez(tmp, L_Okay);                                                                             \
   mv(dst, zr);                                                                                   \
   bind(L_Okay);                                                                                  \
 }
@@ -4172,8 +4178,8 @@ FCMP(double, d);
 // Zero words; len is in bytes
 // Destroys all registers except addr
 // len must be a nonzero multiple of wordSize
-void MacroAssembler::zero_memory(Register addr, Register len, Register tmp1) {
-  assert_different_registers(addr, len, tmp1, t0, t1);
+void MacroAssembler::zero_memory(Register addr, Register len, Register tmp) {
+  assert_different_registers(addr, len, tmp, t0, t1);
 
 #ifdef ASSERT
   {
@@ -4218,9 +4224,8 @@ void MacroAssembler::zero_memory(Register addr, Register len, Register tmp1) {
   srli(len, len, LogBytesPerWord);
   andi(t0, len, unroll - 1);  // t0 = cnt % unroll
   sub(len, len, t0);          // cnt -= unroll
-  // tmp1 always points to the end of the region we're about to zero
-  slli(t1, t0, LogBytesPerWord);
-  add(tmp1, addr, t1);
+  // tmp always points to the end of the region we're about to zero
+  shadd(tmp, t0, addr, t1, LogBytesPerWord);
   la(t1, entry);
   slli(t0, t0, 2);
   sub(t1, t1, t0);
@@ -4228,16 +4233,29 @@ void MacroAssembler::zero_memory(Register addr, Register len, Register tmp1) {
   bind(loop);
   sub(len, len, unroll);
   for (int i = -unroll; i < 0; i++) {
-    Assembler::sd(zr, Address(tmp1, i * wordSize));
+    Assembler::sd(zr, Address(tmp, i * wordSize));
   }
   bind(entry);
-  add(tmp1, tmp1, unroll * wordSize);
+  add(tmp, tmp, unroll * wordSize);
   bnez(len, loop);
 }
 
 // shift left by shamt and add
 // Rd = (Rs1 << shamt) + Rs2
 void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt) {
+  if (UseRVB) {
+    if (shamt == 1) {
+      sh1add(Rd, Rs1, Rs2);
+      return;
+    } else if (shamt == 2) {
+      sh2add(Rd, Rs1, Rs2);
+      return;
+    } else if (shamt == 3) {
+      sh3add(Rd, Rs1, Rs2);
+      return;
+    }
+  }
+
   if (shamt != 0) {
     slli(tmp, Rs1, shamt);
     add(Rd, Rs2, tmp);
@@ -4246,14 +4264,42 @@ void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp
   }
 }
 
-void MacroAssembler::zero_ext(Register dst, Register src, int clear_bits) {
-  slli(dst, src, clear_bits);
-  srli(dst, dst, clear_bits);
+void MacroAssembler::zero_extend(Register dst, Register src, int bits) {
+  if (UseRVB) {
+    if (bits == 16) {
+      zext_h(dst, src);
+      return;
+    } else if (bits == 32) {
+      zext_w(dst, src);
+      return;
+    }
+  }
+
+  if (bits == 8) {
+    zext_b(dst, src);
+  } else {
+    slli(dst, src, XLEN - bits);
+    srli(dst, dst, XLEN - bits);
+  }
 }
 
-void MacroAssembler::sign_ext(Register dst, Register src, int clear_bits) {
-  slli(dst, src, clear_bits);
-  srai(dst, dst, clear_bits);
+void MacroAssembler::sign_extend(Register dst, Register src, int bits) {
+  if (UseRVB) {
+    if (bits == 8) {
+      sext_b(dst, src);
+      return;
+    } else if (bits == 16) {
+      sext_h(dst, src);
+      return;
+    }
+  }
+
+  if (bits == 32) {
+    sext_w(dst, src);
+  } else {
+    slli(dst, src, XLEN - bits);
+    srai(dst, dst, XLEN - bits);
+  }
 }
 
 void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp)
@@ -4284,36 +4330,238 @@ void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Registe
   bind(done);
 }
 
-#ifdef COMPILER2
-typedef void (MacroAssembler::* load_chr_insn)(Register rd, const Address &adr, Register temp);
+void MacroAssembler::safepoint_ifence() {
+  ifence();
+}
 
-// Search for needle in haystack and return index or -1
-// x10: result
-// x11: haystack
-// x12: haystack_len
-// x13: needle
-// x14: needle_len
-void MacroAssembler::string_indexof(Register haystack, Register needle,
-                                       Register haystack_len, Register needle_len,
-                                       Register tmp1, Register tmp2,
-                                       Register tmp3, Register tmp4,
-                                       Register tmp5, Register tmp6,
-                                       Register result, int ae)
+#ifdef COMPILER2
+// short string
+// StringUTF16.indexOfChar
+// StringLatin1.indexOfChar
+void MacroAssembler::string_indexof_char_short(Register str1, Register cnt1,
+                                                  Register ch, Register result,
+                                                  bool isL)
 {
-  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
+  Register ch1 = t0;
+  Register index = t1;
 
-  Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH;
+  BLOCK_COMMENT("string_indexof_char_short {");
 
-  Register ch1 = t0;
-  Register ch2 = t1;
-  Register nlen_tmp = tmp1; // needle len tmp
-  Register hlen_tmp = tmp2; // haystack len tmp
-  Register result_tmp = tmp4;
+  Label LOOP, LOOP1, LOOP4, LOOP8;
+  Label MATCH,  MATCH1, MATCH2, MATCH3,
+          MATCH4, MATCH5, MATCH6, MATCH7, NOMATCH;
 
-  bool isLL = ae == StrIntrinsicNode::LL;
+  mv(result, -1);
+  mv(index, zr);
+
+  bind(LOOP);
+  addi(t0, index, 8);
+  ble(t0, cnt1, LOOP8);
+  addi(t0, index, 4);
+  ble(t0, cnt1, LOOP4);
+  j(LOOP1);
+
+  bind(LOOP8);
+  isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0));
+  beq(ch, ch1, MATCH);
+  isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2));
+  beq(ch, ch1, MATCH1);
+  isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4));
+  beq(ch, ch1, MATCH2);
+  isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6));
+  beq(ch, ch1, MATCH3);
+  isL ? lbu(ch1, Address(str1, 4)) : lhu(ch1, Address(str1, 8));
+  beq(ch, ch1, MATCH4);
+  isL ? lbu(ch1, Address(str1, 5)) : lhu(ch1, Address(str1, 10));
+  beq(ch, ch1, MATCH5);
+  isL ? lbu(ch1, Address(str1, 6)) : lhu(ch1, Address(str1, 12));
+  beq(ch, ch1, MATCH6);
+  isL ? lbu(ch1, Address(str1, 7)) : lhu(ch1, Address(str1, 14));
+  beq(ch, ch1, MATCH7);
+  addi(index, index, 8);
+  addi(str1, str1, isL ? 8 : 16);
+  blt(index, cnt1, LOOP);
+  j(NOMATCH);
 
-  bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
-  bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
+  bind(LOOP4);
+  isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0));
+  beq(ch, ch1, MATCH);
+  isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2));
+  beq(ch, ch1, MATCH1);
+  isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4));
+  beq(ch, ch1, MATCH2);
+  isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6));
+  beq(ch, ch1, MATCH3);
+  addi(index, index, 4);
+  addi(str1, str1, isL ? 4 : 8);
+  bge(index, cnt1, NOMATCH);
+
+  bind(LOOP1);
+  isL ? lbu(ch1, Address(str1)) : lhu(ch1, Address(str1));
+  beq(ch, ch1, MATCH);
+  addi(index, index, 1);
+  addi(str1, str1, isL ? 1 : 2);
+  blt(index, cnt1, LOOP1);
+  j(NOMATCH);
+
+  bind(MATCH1);
+  addi(index, index, 1);
+  j(MATCH);
+
+  bind(MATCH2);
+  addi(index, index, 2);
+  j(MATCH);
+
+  bind(MATCH3);
+  addi(index, index, 3);
+  j(MATCH);
+
+  bind(MATCH4);
+  addi(index, index, 4);
+  j(MATCH);
+
+  bind(MATCH5);
+  addi(index, index, 5);
+  j(MATCH);
+
+  bind(MATCH6);
+  addi(index, index, 6);
+  j(MATCH);
+
+  bind(MATCH7);
+  addi(index, index, 7);
+
+  bind(MATCH);
+  mv(result, index);
+  bind(NOMATCH);
+  BLOCK_COMMENT("} string_indexof_char_short");
+}
+
+// StringUTF16.indexOfChar
+// StringLatin1.indexOfChar
+void MacroAssembler::string_indexof_char(Register str1, Register cnt1,
+                                            Register ch, Register result,
+                                            Register tmp1, Register tmp2,
+                                            Register tmp3, Register tmp4,
+                                            bool isL)
+{
+  Label CH1_LOOP, HIT, NOMATCH, DONE, DO_LONG;
+  Register ch1 = t0;
+  Register orig_cnt = t1;
+  Register mask1 = tmp3;
+  Register mask2 = tmp2;
+  Register match_mask = tmp1;
+  Register trailing_char = tmp4;
+  Register unaligned_elems = tmp4;
+
+  BLOCK_COMMENT("string_indexof_char {");
+  beqz(cnt1, NOMATCH);
+
+  addi(t0, cnt1, isL ? -32 : -16);
+  bgtz(t0, DO_LONG);
+  string_indexof_char_short(str1, cnt1, ch, result, isL);
+  j(DONE);
+
+  bind(DO_LONG);
+  mv(orig_cnt, cnt1);
+  if (AvoidUnalignedAccesses) {
+    Label ALIGNED;
+    andi(unaligned_elems, str1, 0x7);
+    beqz(unaligned_elems, ALIGNED);
+    sub(unaligned_elems, unaligned_elems, 8);
+    neg(unaligned_elems, unaligned_elems);
+    if (!isL) {
+      srli(unaligned_elems, unaligned_elems, 1);
+    }
+    // do unaligned part per element
+    string_indexof_char_short(str1, unaligned_elems, ch, result, isL);
+    bgez(result, DONE);
+    mv(orig_cnt, cnt1);
+    sub(cnt1, cnt1, unaligned_elems);
+    bind(ALIGNED);
+  }
+
+  // duplicate ch
+  if (isL) {
+    slli(ch1, ch, 8);
+    orr(ch, ch1, ch);
+  }
+  slli(ch1, ch, 16);
+  orr(ch, ch1, ch);
+  slli(ch1, ch, 32);
+  orr(ch, ch1, ch);
+
+  if (!isL) {
+    slli(cnt1, cnt1, 1);
+  }
+
+  uint64_t mask0101 = UCONST64(0x0101010101010101);
+  uint64_t mask0001 = UCONST64(0x0001000100010001);
+  mv(mask1, isL ? mask0101 : mask0001);
+  uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f);
+  uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff);
+  mv(mask2, isL ? mask7f7f : mask7fff);
+
+  bind(CH1_LOOP);
+  ld(ch1, Address(str1));
+  addi(str1, str1, 8);
+  addi(cnt1, cnt1, -8);
+  compute_match_mask(ch1, ch, match_mask, mask1, mask2);
+  bnez(match_mask, HIT);
+  bgtz(cnt1, CH1_LOOP);
+  j(NOMATCH);
+
+  bind(HIT);
+  ctzc_bit(trailing_char, match_mask, isL, ch1, result);
+  srli(trailing_char, trailing_char, 3);
+  addi(cnt1, cnt1, 8);
+  ble(cnt1, trailing_char, NOMATCH);
+  // match case
+  if (!isL) {
+    srli(cnt1, cnt1, 1);
+    srli(trailing_char, trailing_char, 1);
+  }
+
+  sub(result, orig_cnt, cnt1);
+  add(result, result, trailing_char);
+  j(DONE);
+
+  bind(NOMATCH);
+  mv(result, -1);
+
+  bind(DONE);
+  BLOCK_COMMENT("} string_indexof_char");
+}
+
+typedef void (MacroAssembler::* load_chr_insn)(Register rd, const Address &adr, Register temp);
+
+// Search for needle in haystack and return index or -1
+// x10: result
+// x11: haystack
+// x12: haystack_len
+// x13: needle
+// x14: needle_len
+void MacroAssembler::string_indexof(Register haystack, Register needle,
+                                       Register haystack_len, Register needle_len,
+                                       Register tmp1, Register tmp2,
+                                       Register tmp3, Register tmp4,
+                                       Register tmp5, Register tmp6,
+                                       Register result, int ae)
+{
+  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
+
+  Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH;
+
+  Register ch1 = t0;
+  Register ch2 = t1;
+  Register nlen_tmp = tmp1; // needle len tmp
+  Register hlen_tmp = tmp2; // haystack len tmp
+  Register result_tmp = tmp4;
+
+  bool isLL = ae == StrIntrinsicNode::LL;
+
+  bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
+  bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
   int needle_chr_shift = needle_isL ? 0 : 1;
   int haystack_chr_shift = haystack_isL ? 0 : 1;
   int needle_chr_size = needle_isL ? 1 : 2;
@@ -4459,8 +4707,8 @@ void MacroAssembler::string_indexof(Register haystack, Register needle,
   sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern
   Register orig_haystack = tmp5;
   mv(orig_haystack, haystack);
-  slli(haystack_end, result_tmp, haystack_chr_shift); // result_tmp = tmp4
-  add(haystack_end, haystack, haystack_end);
+  // result_tmp = tmp4
+  shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift);
   sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1
   mv(tmp3, needle);
 
@@ -4489,8 +4737,8 @@ void MacroAssembler::string_indexof(Register haystack, Register needle,
   sub(ch2, ch2, 1); // for next pattern element, skip distance -1
   bgtz(ch2, BCLOOP);
 
-  slli(tmp6, needle_len, needle_chr_shift);
-  add(tmp6, tmp6, needle); // tmp6: pattern end, address after needle
+  // tmp6: pattern end, address after needle
+  shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift);
   if (needle_isL == haystack_isL) {
     // load last 8 bytes (8LL/4UU symbols)
     ld(tmp6, Address(tmp6, -wordSize));
@@ -4500,10 +4748,10 @@ void MacroAssembler::string_indexof(Register haystack, Register needle,
     // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d
     // We'll have to wait until load completed, but it's still faster than per-character loads+checks
     srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a
-    slli(ch2, tmp6, registerSize - 24);
-    srli(ch2, ch2, registerSize - 8); // pattern[m-2], 0x0000000b
-    slli(ch1, tmp6, registerSize - 16);
-    srli(ch1, ch1, registerSize - 8); // pattern[m-3], 0x0000000c
+    slli(ch2, tmp6, XLEN - 24);
+    srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b
+    slli(ch1, tmp6, XLEN - 16);
+    srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c
     andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d
     slli(ch2, ch2, 16);
     orr(ch2, ch2, ch1); // 0x00000b0c
@@ -4521,8 +4769,7 @@ void MacroAssembler::string_indexof(Register haystack, Register needle,
   //   move j with bad char offset table
   bind(BMLOOPSTR2);
   // compare pattern to source string backward
-  slli(result, nlen_tmp, haystack_chr_shift);
-  add(result, haystack, result);
+  shadd(result, nlen_tmp, haystack, result, haystack_chr_shift);
   (this->*haystack_load_1chr)(skipch, Address(result), noreg);
   sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8
   if (needle_isL == haystack_isL) {
@@ -4546,11 +4793,9 @@ void MacroAssembler::string_indexof(Register haystack, Register needle,
   }
 
   bind(BMLOOPSTR1);
-  slli(ch1, nlen_tmp, needle_chr_shift);
-  add(ch1, ch1, needle);
+  shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift);
   (this->*needle_load_1chr)(ch1, Address(ch1), noreg);
-  slli(ch2, nlen_tmp, haystack_chr_shift);
-  add(ch2, haystack, ch2);
+  shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift);
   (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
 
   bind(BMLOOPSTR1_AFTER_LOAD);
@@ -4577,8 +4822,8 @@ void MacroAssembler::string_indexof(Register haystack, Register needle,
 
   bind(BMADV);
   sub(nlen_tmp, needle_len, 1);
-  slli(result, result_tmp, haystack_chr_shift);
-  add(haystack, haystack, result); // move haystack after bad char skip offset
+  // move haystack after bad char skip offset
+  shadd(haystack, result_tmp, haystack, result, haystack_chr_shift);
   ble(haystack, haystack_end, BMLOOPSTR2);
   add(sp, sp, ASIZE);
   j(NOMATCH);
@@ -4600,13 +4845,13 @@ void MacroAssembler::string_indexof(Register haystack, Register needle,
   mv(result, zr);
   RuntimeAddress stub = NULL;
   if (isLL) {
-    stub = RuntimeAddress(StubRoutines::riscv64::string_indexof_linear_ll());
+    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll());
     assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated");
   } else if (needle_isL) {
-    stub = RuntimeAddress(StubRoutines::riscv64::string_indexof_linear_ul());
+    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul());
     assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated");
   } else {
-    stub = RuntimeAddress(StubRoutines::riscv64::string_indexof_linear_uu());
+    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu());
     assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated");
   }
   trampoline_call(stub);
@@ -4662,162 +4907,675 @@ void MacroAssembler::string_indexof_linearscan(Register haystack, Register needl
   load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::lhu : (load_chr_insn)&MacroAssembler::lwu;
   load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::lwu : (load_chr_insn)&MacroAssembler::ld;
 
-  Label DO1, DO2, DO3, MATCH, NOMATCH, DONE;
+  Label DO1, DO2, DO3, MATCH, NOMATCH, DONE;
+
+  Register first = tmp3;
+
+  if (needle_con_cnt == -1) {
+    Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT;
+
+    sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2);
+    bltz(t0, DOSHORT);
+
+    (this->*needle_load_1chr)(first, Address(needle), noreg);
+    slli(t0, needle_len, needle_chr_shift);
+    add(needle, needle, t0);
+    neg(nlen_neg, t0);
+    slli(t0, result_tmp, haystack_chr_shift);
+    add(haystack, haystack, t0);
+    neg(hlen_neg, t0);
+
+    bind(FIRST_LOOP);
+    add(t0, haystack, hlen_neg);
+    (this->*haystack_load_1chr)(ch2, Address(t0), noreg);
+    beq(first, ch2, STR1_LOOP);
+
+    bind(STR2_NEXT);
+    add(hlen_neg, hlen_neg, haystack_chr_size);
+    blez(hlen_neg, FIRST_LOOP);
+    j(NOMATCH);
+
+    bind(STR1_LOOP);
+    add(nlen_tmp, nlen_neg, needle_chr_size);
+    add(hlen_tmp, hlen_neg, haystack_chr_size);
+    bgez(nlen_tmp, MATCH);
+
+    bind(STR1_NEXT);
+    add(ch1, needle, nlen_tmp);
+    (this->*needle_load_1chr)(ch1, Address(ch1), noreg);
+    add(ch2, haystack, hlen_tmp);
+    (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
+    bne(ch1, ch2, STR2_NEXT);
+    add(nlen_tmp, nlen_tmp, needle_chr_size);
+    add(hlen_tmp, hlen_tmp, haystack_chr_size);
+    bltz(nlen_tmp, STR1_NEXT);
+    j(MATCH);
+
+    bind(DOSHORT);
+    if (needle_isL == haystack_isL) {
+      sub(t0, needle_len, 2);
+      bltz(t0, DO1);
+      bgtz(t0, DO3);
+    }
+  }
+
+  if (needle_con_cnt == 4) {
+    Label CH1_LOOP;
+    (this->*load_4chr)(ch1, Address(needle), noreg);
+    sub(result_tmp, haystack_len, 4);
+    slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp
+    add(haystack, haystack, tmp3);
+    neg(hlen_neg, tmp3);
+
+    bind(CH1_LOOP);
+    add(ch2, haystack, hlen_neg);
+    (this->*load_4chr)(ch2, Address(ch2), noreg);
+    beq(ch1, ch2, MATCH);
+    add(hlen_neg, hlen_neg, haystack_chr_size);
+    blez(hlen_neg, CH1_LOOP);
+    j(NOMATCH);
+  }
+
+  if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) {
+    Label CH1_LOOP;
+    BLOCK_COMMENT("string_indexof DO2 {");
+    bind(DO2);
+    (this->*load_2chr)(ch1, Address(needle), noreg);
+    if (needle_con_cnt == 2) {
+      sub(result_tmp, haystack_len, 2);
+    }
+    slli(tmp3, result_tmp, haystack_chr_shift);
+    add(haystack, haystack, tmp3);
+    neg(hlen_neg, tmp3);
+
+    bind(CH1_LOOP);
+    add(tmp3, haystack, hlen_neg);
+    (this->*load_2chr)(ch2, Address(tmp3), noreg);
+    beq(ch1, ch2, MATCH);
+    add(hlen_neg, hlen_neg, haystack_chr_size);
+    blez(hlen_neg, CH1_LOOP);
+    j(NOMATCH);
+    BLOCK_COMMENT("} string_indexof DO2");
+  }
+
+  if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) {
+    Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;
+    BLOCK_COMMENT("string_indexof DO3 {");
+
+    bind(DO3);
+    (this->*load_2chr)(first, Address(needle), noreg);
+    (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg);
+    if (needle_con_cnt == 3) {
+      sub(result_tmp, haystack_len, 3);
+    }
+    slli(hlen_tmp, result_tmp, haystack_chr_shift);
+    add(haystack, haystack, hlen_tmp);
+    neg(hlen_neg, hlen_tmp);
+
+    bind(FIRST_LOOP);
+    add(ch2, haystack, hlen_neg);
+    (this->*load_2chr)(ch2, Address(ch2), noreg);
+    beq(first, ch2, STR1_LOOP);
+
+    bind(STR2_NEXT);
+    add(hlen_neg, hlen_neg, haystack_chr_size);
+    blez(hlen_neg, FIRST_LOOP);
+    j(NOMATCH);
+
+    bind(STR1_LOOP);
+    add(hlen_tmp, hlen_neg, 2 * haystack_chr_size);
+    add(ch2, haystack, hlen_tmp);
+    (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
+    bne(ch1, ch2, STR2_NEXT);
+    j(MATCH);
+    BLOCK_COMMENT("} string_indexof DO3");
+  }
+
+  if (needle_con_cnt == -1 || needle_con_cnt == 1) {
+    Label DO1_LOOP;
+
+    BLOCK_COMMENT("string_indexof DO1 {");
+    bind(DO1);
+    (this->*needle_load_1chr)(ch1, Address(needle), noreg);
+    sub(result_tmp, haystack_len, 1);
+    mv(tmp3, result_tmp);
+    if (haystack_chr_shift) {
+      slli(tmp3, result_tmp, haystack_chr_shift);
+    }
+    add(haystack, haystack, tmp3);
+    neg(hlen_neg, tmp3);
+
+    bind(DO1_LOOP);
+    add(tmp3, haystack, hlen_neg);
+    (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg);
+    beq(ch1, ch2, MATCH);
+    add(hlen_neg, hlen_neg, haystack_chr_size);
+    blez(hlen_neg, DO1_LOOP);
+    BLOCK_COMMENT("} string_indexof DO1");
+  }
+
+  bind(NOMATCH);
+  mv(result, -1);
+  j(DONE);
+
+  bind(MATCH);
+  srai(t0, hlen_neg, haystack_chr_shift);
+  add(result, result_tmp, t0);
+
+  bind(DONE);
+}
+
+// Compare strings.
+void MacroAssembler::string_compare(Register str1, Register str2,
+                                       Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
+                                       Register tmp3, int ae)
+{
+  Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB,
+          DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
+          SHORT_LOOP_START, TAIL_CHECK, L;
+
+  const int STUB_THRESHOLD = 64 + 8;
+  bool isLL = ae == StrIntrinsicNode::LL;
+  bool isLU = ae == StrIntrinsicNode::LU;
+  bool isUL = ae == StrIntrinsicNode::UL;
+
+  bool str1_isL = isLL || isLU;
+  bool str2_isL = isLL || isUL;
+
+  // for L strings, 1 byte for 1 character
+  // for U strings, 2 bytes for 1 character
+  int str1_chr_size = str1_isL ? 1 : 2;
+  int str2_chr_size = str2_isL ? 1 : 2;
+  int minCharsInWord = isLL ? wordSize : wordSize / 2;
+
+  load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
+  load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
+
+  BLOCK_COMMENT("string_compare {");
+
+  // Bizzarely, the counts are passed in bytes, regardless of whether they
+  // are L or U strings, however the result is always in characters.
+  if (!str1_isL) {
+    sraiw(cnt1, cnt1, 1);
+  }
+  if (!str2_isL) {
+    sraiw(cnt2, cnt2, 1);
+  }
+
+  // Compute the minimum of the string lengths and save the difference in result.
+  sub(result, cnt1, cnt2);
+  bgt(cnt1, cnt2, L);
+  mv(cnt2, cnt1);
+  bind(L);
+
+  // A very short string
+  li(t0, minCharsInWord);
+  ble(cnt2, t0, SHORT_STRING);
+
+  // Compare longwords
+  // load first parts of strings and finish initialization while loading
+  {
+    if (str1_isL == str2_isL) { // LL or UU
+      // load 8 bytes once to compare
+      ld(tmp1, Address(str1));
+      beq(str1, str2, DONE);
+      ld(tmp2, Address(str2));
+      li(t0, STUB_THRESHOLD);
+      bge(cnt2, t0, STUB);
+      sub(cnt2, cnt2, minCharsInWord);
+      beqz(cnt2, TAIL_CHECK);
+      // convert cnt2 from characters to bytes
+      if (!str1_isL) {
+        slli(cnt2, cnt2, 1);
+      }
+      add(str2, str2, cnt2);
+      add(str1, str1, cnt2);
+      sub(cnt2, zr, cnt2);
+    } else if (isLU) { // LU case
+      lwu(tmp1, Address(str1));
+      ld(tmp2, Address(str2));
+      li(t0, STUB_THRESHOLD);
+      bge(cnt2, t0, STUB);
+      addi(cnt2, cnt2, -4);
+      add(str1, str1, cnt2);
+      sub(cnt1, zr, cnt2);
+      slli(cnt2, cnt2, 1);
+      add(str2, str2, cnt2);
+      inflate_lo32(tmp3, tmp1);
+      mv(tmp1, tmp3);
+      sub(cnt2, zr, cnt2);
+      addi(cnt1, cnt1, 4);
+    } else { // UL case
+      ld(tmp1, Address(str1));
+      lwu(tmp2, Address(str2));
+      li(t0, STUB_THRESHOLD);
+      bge(cnt2, t0, STUB);
+      addi(cnt2, cnt2, -4);
+      slli(t0, cnt2, 1);
+      sub(cnt1, zr, t0);
+      add(str1, str1, t0);
+      add(str2, str2, cnt2);
+      inflate_lo32(tmp3, tmp2);
+      mv(tmp2, tmp3);
+      sub(cnt2, zr, cnt2);
+      addi(cnt1, cnt1, 8);
+    }
+    addi(cnt2, cnt2, isUL ? 4 : 8);
+    bgez(cnt2, TAIL);
+    xorr(tmp3, tmp1, tmp2);
+    bnez(tmp3, DIFFERENCE);
+
+    // main loop
+    bind(NEXT_WORD);
+    if (str1_isL == str2_isL) { // LL or UU
+      add(t0, str1, cnt2);
+      ld(tmp1, Address(t0));
+      add(t0, str2, cnt2);
+      ld(tmp2, Address(t0));
+      addi(cnt2, cnt2, 8);
+    } else if (isLU) { // LU case
+      add(t0, str1, cnt1);
+      lwu(tmp1, Address(t0));
+      add(t0, str2, cnt2);
+      ld(tmp2, Address(t0));
+      addi(cnt1, cnt1, 4);
+      inflate_lo32(tmp3, tmp1);
+      mv(tmp1, tmp3);
+      addi(cnt2, cnt2, 8);
+    } else { // UL case
+      add(t0, str2, cnt2);
+      lwu(tmp2, Address(t0));
+      add(t0, str1, cnt1);
+      ld(tmp1, Address(t0));
+      inflate_lo32(tmp3, tmp2);
+      mv(tmp2, tmp3);
+      addi(cnt1, cnt1, 8);
+      addi(cnt2, cnt2, 4);
+    }
+    bgez(cnt2, TAIL);
+
+    xorr(tmp3, tmp1, tmp2);
+    beqz(tmp3, NEXT_WORD);
+    j(DIFFERENCE);
+    bind(TAIL);
+    xorr(tmp3, tmp1, tmp2);
+    bnez(tmp3, DIFFERENCE);
+    // Last longword.  In the case where length == 4 we compare the
+    // same longword twice, but that's still faster than another
+    // conditional branch.
+    if (str1_isL == str2_isL) { // LL or UU
+      ld(tmp1, Address(str1));
+      ld(tmp2, Address(str2));
+    } else if (isLU) { // LU case
+      lwu(tmp1, Address(str1));
+      ld(tmp2, Address(str2));
+      inflate_lo32(tmp3, tmp1);
+      mv(tmp1, tmp3);
+    } else { // UL case
+      lwu(tmp2, Address(str2));
+      ld(tmp1, Address(str1));
+      inflate_lo32(tmp3, tmp2);
+      mv(tmp2, tmp3);
+    }
+    bind(TAIL_CHECK);
+    xorr(tmp3, tmp1, tmp2);
+    beqz(tmp3, DONE);
+
+    // Find the first different characters in the longwords and
+    // compute their difference.
+    bind(DIFFERENCE);
+    ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb
+    srl(tmp1, tmp1, result);
+    srl(tmp2, tmp2, result);
+    if (isLL) {
+      andi(tmp1, tmp1, 0xFF);
+      andi(tmp2, tmp2, 0xFF);
+    } else {
+      andi(tmp1, tmp1, 0xFFFF);
+      andi(tmp2, tmp2, 0xFFFF);
+    }
+    sub(result, tmp1, tmp2);
+    j(DONE);
+  }
+
+  bind(STUB);
+  RuntimeAddress stub = NULL;
+  switch (ae) {
+    case StrIntrinsicNode::LL:
+      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL());
+      break;
+    case StrIntrinsicNode::UU:
+      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU());
+      break;
+    case StrIntrinsicNode::LU:
+      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU());
+      break;
+    case StrIntrinsicNode::UL:
+      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL());
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+  assert(stub.target() != NULL, "compare_long_string stub has not been generated");
+  trampoline_call(stub);
+  j(DONE);
+
+  bind(SHORT_STRING);
+  // Is the minimum length zero?
+  beqz(cnt2, DONE);
+  // arrange code to do most branches while loading and loading next characters
+  // while comparing previous
+  (this->*str1_load_chr)(tmp1, Address(str1), t0);
+  addi(str1, str1, str1_chr_size);
+  addi(cnt2, cnt2, -1);
+  beqz(cnt2, SHORT_LAST_INIT);
+  (this->*str2_load_chr)(cnt1, Address(str2), t0);
+  addi(str2, str2, str2_chr_size);
+  j(SHORT_LOOP_START);
+  bind(SHORT_LOOP);
+  addi(cnt2, cnt2, -1);
+  beqz(cnt2, SHORT_LAST);
+  bind(SHORT_LOOP_START);
+  (this->*str1_load_chr)(tmp2, Address(str1), t0);
+  addi(str1, str1, str1_chr_size);
+  (this->*str2_load_chr)(t0, Address(str2), t0);
+  addi(str2, str2, str2_chr_size);
+  bne(tmp1, cnt1, SHORT_LOOP_TAIL);
+  addi(cnt2, cnt2, -1);
+  beqz(cnt2, SHORT_LAST2);
+  (this->*str1_load_chr)(tmp1, Address(str1), t0);
+  addi(str1, str1, str1_chr_size);
+  (this->*str2_load_chr)(cnt1, Address(str2), t0);
+  addi(str2, str2, str2_chr_size);
+  beq(tmp2, t0, SHORT_LOOP);
+  sub(result, tmp2, t0);
+  j(DONE);
+  bind(SHORT_LOOP_TAIL);
+  sub(result, tmp1, cnt1);
+  j(DONE);
+  bind(SHORT_LAST2);
+  beq(tmp2, t0, DONE);
+  sub(result, tmp2, t0);
+
+  j(DONE);
+  bind(SHORT_LAST_INIT);
+  (this->*str2_load_chr)(cnt1, Address(str2), t0);
+  addi(str2, str2, str2_chr_size);
+  bind(SHORT_LAST);
+  beq(tmp1, cnt1, DONE);
+  sub(result, tmp1, cnt1);
+
+  bind(DONE);
+
+  BLOCK_COMMENT("} string_compare");
+}
+
+void MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
+                                      Register tmp4, Register tmp5, Register tmp6, Register result,
+                                      Register cnt1, int elem_size) {
+  Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR;
+  Register tmp1 = t0;
+  Register tmp2 = t1;
+  Register cnt2 = tmp2;  // cnt2 only used in array length compare
+  Register elem_per_word = tmp6;
+  int log_elem_size = exact_log2(elem_size);
+  int length_offset = arrayOopDesc::length_offset_in_bytes();
+  int base_offset   = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
+
+  assert(elem_size == 1 || elem_size == 2, "must be char or byte");
+  assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6);
+  li(elem_per_word, wordSize / elem_size);
+
+  BLOCK_COMMENT("arrays_equals {");
+
+  // if (a1 == a2), return true
+  beq(a1, a2, SAME);
+
+  mv(result, false);
+  beqz(a1, DONE);
+  beqz(a2, DONE);
+  lwu(cnt1, Address(a1, length_offset));
+  lwu(cnt2, Address(a2, length_offset));
+  bne(cnt2, cnt1, DONE);
+  beqz(cnt1, SAME);
+
+  slli(tmp5, cnt1, 3 + log_elem_size);
+  sub(tmp5, zr, tmp5);
+  add(a1, a1, base_offset);
+  add(a2, a2, base_offset);
+  ld(tmp3, Address(a1, 0));
+  ld(tmp4, Address(a2, 0));
+  ble(cnt1, elem_per_word, SHORT); // short or same
+
+  // Main 16 byte comparison loop with 2 exits
+  bind(NEXT_DWORD); {
+    ld(tmp1, Address(a1, wordSize));
+    ld(tmp2, Address(a2, wordSize));
+    sub(cnt1, cnt1, 2 * wordSize / elem_size);
+    blez(cnt1, TAIL);
+    bne(tmp3, tmp4, DONE);
+    ld(tmp3, Address(a1, 2 * wordSize));
+    ld(tmp4, Address(a2, 2 * wordSize));
+    add(a1, a1, 2 * wordSize);
+    add(a2, a2, 2 * wordSize);
+    ble(cnt1, elem_per_word, TAIL2);
+  } beq(tmp1, tmp2, NEXT_DWORD);
+  j(DONE);
+
+  bind(TAIL);
+  xorr(tmp4, tmp3, tmp4);
+  xorr(tmp2, tmp1, tmp2);
+  sll(tmp2, tmp2, tmp5);
+  orr(tmp5, tmp4, tmp2);
+  j(IS_TMP5_ZR);
+
+  bind(TAIL2);
+  bne(tmp1, tmp2, DONE);
+
+  bind(SHORT);
+  xorr(tmp4, tmp3, tmp4);
+  sll(tmp5, tmp4, tmp5);
+
+  bind(IS_TMP5_ZR);
+  bnez(tmp5, DONE);
+
+  bind(SAME);
+  mv(result, true);
+  // That's it.
+  bind(DONE);
+
+  BLOCK_COMMENT("} array_equals");
+}
+
+// Compare Strings
 
-  Register first = tmp3;
+// For Strings we're passed the address of the first characters in a1
+// and a2 and the length in cnt1.
+// elem_size is the element size in bytes: either 1 or 2.
+// There are two implementations.  For arrays >= 8 bytes, all
+// comparisons (including the final one, which may overlap) are
+// performed 8 bytes at a time.  For strings < 8 bytes, we compare a
+// halfword, then a short, and then a byte.
 
-  if (needle_con_cnt == -1) {
-    Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT;
+void MacroAssembler::string_equals(Register a1, Register a2,
+                                      Register result, Register cnt1, int elem_size)
+{
+  Label SAME, DONE, SHORT, NEXT_WORD;
+  Register tmp1 = t0;
+  Register tmp2 = t1;
 
-    sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2);
-    bltz(t0, DOSHORT);
+  assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte");
+  assert_different_registers(a1, a2, result, cnt1, t0, t1);
 
-    (this->*needle_load_1chr)(first, Address(needle), noreg);
-    slli(t0, needle_len, needle_chr_shift);
-    add(needle, needle, t0);
-    neg(nlen_neg, t0);
-    slli(t0, result_tmp, haystack_chr_shift);
-    add(haystack, haystack, t0);
-    neg(hlen_neg, t0);
+  BLOCK_COMMENT("string_equals {");
 
-    bind(FIRST_LOOP);
-    add(t0, haystack, hlen_neg);
-    (this->*haystack_load_1chr)(ch2, Address(t0), noreg);
-    beq(first, ch2, STR1_LOOP);
+  mv(result, false);
 
-    bind(STR2_NEXT);
-    add(hlen_neg, hlen_neg, haystack_chr_size);
-    blez(hlen_neg, FIRST_LOOP);
-    j(NOMATCH);
+  // Check for short strings, i.e. smaller than wordSize.
+  sub(cnt1, cnt1, wordSize);
+  bltz(cnt1, SHORT);
 
-    bind(STR1_LOOP);
-    add(nlen_tmp, nlen_neg, needle_chr_size);
-    add(hlen_tmp, hlen_neg, haystack_chr_size);
-    bgez(nlen_tmp, MATCH);
+  // Main 8 byte comparison loop.
+  bind(NEXT_WORD); {
+    ld(tmp1, Address(a1, 0));
+    add(a1, a1, wordSize);
+    ld(tmp2, Address(a2, 0));
+    add(a2, a2, wordSize);
+    sub(cnt1, cnt1, wordSize);
+    bne(tmp1, tmp2, DONE);
+  } bgtz(cnt1, NEXT_WORD);
 
-    bind(STR1_NEXT);
-    add(ch1, needle, nlen_tmp);
-    (this->*needle_load_1chr)(ch1, Address(ch1), noreg);
-    add(ch2, haystack, hlen_tmp);
-    (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
-    bne(ch1, ch2, STR2_NEXT);
-    add(nlen_tmp, nlen_tmp, needle_chr_size);
-    add(hlen_tmp, hlen_tmp, haystack_chr_size);
-    bltz(nlen_tmp, STR1_NEXT);
-    j(MATCH);
+  // Last longword.  In the case where length == 4 we compare the
+  // same longword twice, but that's still faster than another
+  // conditional branch.
+  // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
+  // length == 4.
+  add(tmp1, a1, cnt1);
+  ld(tmp1, Address(tmp1, 0));
+  add(tmp2, a2, cnt1);
+  ld(tmp2, Address(tmp2, 0));
+  bne(tmp1, tmp2, DONE);
+  j(SAME);
 
-    bind(DOSHORT);
-    if (needle_isL == haystack_isL) {
-      sub(t0, needle_len, 2);
-      bltz(t0, DO1);
-      bgtz(t0, DO3);
-    }
-  }
+  bind(SHORT);
+  Label TAIL03, TAIL01;
 
-  if (needle_con_cnt == 4) {
-    Label CH1_LOOP;
-    (this->*load_4chr)(ch1, Address(needle), noreg);
-    sub(result_tmp, haystack_len, 4);
-    slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp
-    add(haystack, haystack, tmp3);
-    neg(hlen_neg, tmp3);
+  // 0-7 bytes left.
+  andi(t0, cnt1, 4);
+  beqz(t0, TAIL03);
+  {
+    lwu(tmp1, Address(a1, 0));
+    add(a1, a1, 4);
+    lwu(tmp2, Address(a2, 0));
+    add(a2, a2, 4);
+    bne(tmp1, tmp2, DONE);
+  }
 
-    bind(CH1_LOOP);
-    add(ch2, haystack, hlen_neg);
-    (this->*load_4chr)(ch2, Address(ch2), noreg);
-    beq(ch1, ch2, MATCH);
-    add(hlen_neg, hlen_neg, haystack_chr_size);
-    blez(hlen_neg, CH1_LOOP);
-    j(NOMATCH);
+  bind(TAIL03);
+  // 0-3 bytes left.
+  andi(t0, cnt1, 2);
+  beqz(t0, TAIL01);
+  {
+    lhu(tmp1, Address(a1, 0));
+    add(a1, a1, 2);
+    lhu(tmp2, Address(a2, 0));
+    add(a2, a2, 2);
+    bne(tmp1, tmp2, DONE);
   }
 
-  if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) {
-    Label CH1_LOOP;
-    BLOCK_COMMENT("string_indexof DO2 {");
-    bind(DO2);
-    (this->*load_2chr)(ch1, Address(needle), noreg);
-    if (needle_con_cnt == 2) {
-      sub(result_tmp, haystack_len, 2);
+  bind(TAIL01);
+  if (elem_size == 1) { // Only needed when comparing 1-byte elements
+    // 0-1 bytes left.
+    andi(t0, cnt1, 1);
+    beqz(t0, SAME);
+    {
+      lbu(tmp1, a1, 0);
+      lbu(tmp2, a2, 0);
+      bne(tmp1, tmp2, DONE);
     }
-    slli(tmp3, result_tmp, haystack_chr_shift);
-    add(haystack, haystack, tmp3);
-    neg(hlen_neg, tmp3);
-
-    bind(CH1_LOOP);
-    add(tmp3, haystack, hlen_neg);
-    (this->*load_2chr)(ch2, Address(tmp3), noreg);
-    beq(ch1, ch2, MATCH);
-    add(hlen_neg, hlen_neg, haystack_chr_size);
-    blez(hlen_neg, CH1_LOOP);
-    j(NOMATCH);
-    BLOCK_COMMENT("} string_indexof DO2");
   }
 
-  if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) {
-    Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;
-    BLOCK_COMMENT("string_indexof DO3 {");
+  // Arrays are equal.
+  bind(SAME);
+  mv(result, true);
 
-    bind(DO3);
-    (this->*load_2chr)(first, Address(needle), noreg);
-    (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg);
-    if (needle_con_cnt == 3) {
-      sub(result_tmp, haystack_len, 3);
-    }
-    slli(hlen_tmp, result_tmp, haystack_chr_shift);
-    add(haystack, haystack, hlen_tmp);
-    neg(hlen_neg, hlen_tmp);
+  // That's it.
+  bind(DONE);
+  BLOCK_COMMENT("} string_equals");
+}
 
-    bind(FIRST_LOOP);
-    add(ch2, haystack, hlen_neg);
-    (this->*load_2chr)(ch2, Address(ch2), noreg);
-    beq(first, ch2, STR1_LOOP);
+typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far);
+typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label,
+                                                              bool is_far, bool is_unordered);
 
-    bind(STR2_NEXT);
-    add(hlen_neg, hlen_neg, haystack_chr_size);
-    blez(hlen_neg, FIRST_LOOP);
-    j(NOMATCH);
+static conditional_branch_insn conditional_branches[] =
+{
+  /* SHORT branches */
+  (conditional_branch_insn)&Assembler::beq,
+  (conditional_branch_insn)&Assembler::bgt,
+  NULL, // BoolTest::overflow
+  (conditional_branch_insn)&Assembler::blt,
+  (conditional_branch_insn)&Assembler::bne,
+  (conditional_branch_insn)&Assembler::ble,
+  NULL, // BoolTest::no_overflow
+  (conditional_branch_insn)&Assembler::bge,
 
-    bind(STR1_LOOP);
-    add(hlen_tmp, hlen_neg, 2 * haystack_chr_size);
-    add(ch2, haystack, hlen_tmp);
-    (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
-    bne(ch1, ch2, STR2_NEXT);
-    j(MATCH);
-    BLOCK_COMMENT("} string_indexof DO3");
-  }
+  /* UNSIGNED branches */
+  (conditional_branch_insn)&Assembler::beq,
+  (conditional_branch_insn)&Assembler::bgtu,
+  NULL,
+  (conditional_branch_insn)&Assembler::bltu,
+  (conditional_branch_insn)&Assembler::bne,
+  (conditional_branch_insn)&Assembler::bleu,
+  NULL,
+  (conditional_branch_insn)&Assembler::bgeu
+};
 
-  if (needle_con_cnt == -1 || needle_con_cnt == 1) {
-    Label DO1_LOOP;
+static float_conditional_branch_insn float_conditional_branches[] =
+{
+  /* FLOAT SHORT branches */
+  (float_conditional_branch_insn)&MacroAssembler::float_beq,
+  (float_conditional_branch_insn)&MacroAssembler::float_bgt,
+  NULL,  // BoolTest::overflow
+  (float_conditional_branch_insn)&MacroAssembler::float_blt,
+  (float_conditional_branch_insn)&MacroAssembler::float_bne,
+  (float_conditional_branch_insn)&MacroAssembler::float_ble,
+  NULL, // BoolTest::no_overflow
+  (float_conditional_branch_insn)&MacroAssembler::float_bge,
 
-    BLOCK_COMMENT("string_indexof DO1 {");
-    bind(DO1);
-    (this->*needle_load_1chr)(ch1, Address(needle), noreg);
-    sub(result_tmp, haystack_len, 1);
-    mv(tmp3, result_tmp);
-    if (haystack_chr_shift) {
-      slli(tmp3, result_tmp, haystack_chr_shift);
-    }
-    add(haystack, haystack, tmp3);
-    neg(hlen_neg, tmp3);
+  /* DOUBLE SHORT branches */
+  (float_conditional_branch_insn)&MacroAssembler::double_beq,
+  (float_conditional_branch_insn)&MacroAssembler::double_bgt,
+  NULL,
+  (float_conditional_branch_insn)&MacroAssembler::double_blt,
+  (float_conditional_branch_insn)&MacroAssembler::double_bne,
+  (float_conditional_branch_insn)&MacroAssembler::double_ble,
+  NULL,
+  (float_conditional_branch_insn)&MacroAssembler::double_bge
+};
 
-    bind(DO1_LOOP);
-    add(tmp3, haystack, hlen_neg);
-    (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg);
-    beq(ch1, ch2, MATCH);
-    add(hlen_neg, hlen_neg, haystack_chr_size);
-    blez(hlen_neg, DO1_LOOP);
-    BLOCK_COMMENT("} string_indexof DO1");
-  }
+void MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) {
+  assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])),
+         "invalid conditional branch index");
+  (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far);
+}
 
-  bind(NOMATCH);
-  mv(result, -1);
-  j(DONE);
+// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use
+// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode().
+void MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) {
+  assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])),
+         "invalid float conditional branch index");
+  int booltest_flag = cmpFlag & ~(MacroAssembler::double_branch_mask);
+  (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far,
+                                               (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true);
+}
 
-  bind(MATCH);
-  srai(t0, hlen_neg, haystack_chr_shift);
-  add(result, result_tmp, t0);
+void MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
+  switch (cmpFlag) {
+    case BoolTest::eq:
+    case BoolTest::le:
+      beqz(op1, L, is_far);
+      break;
+    case BoolTest::ne:
+    case BoolTest::gt:
+      bnez(op1, L, is_far);
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+}
 
-  bind(DONE);
+void MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
+  switch (cmpFlag) {
+    case BoolTest::eq:
+      beqz(op1, L, is_far);
+      break;
+    case BoolTest::ne:
+      bnez(op1, L, is_far);
+      break;
+    default:
+      ShouldNotReachHere();
+  }
 }
 
 void MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src) {
@@ -5140,3 +5898,4 @@ void MacroAssembler::reduce_minmax_FD_v(FloatRegister dst,
 }
 
 #endif // COMPILER2
+
diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
index e1474e49276..cb62bb0d875 100644
--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -28,6 +28,7 @@
 #define CPU_RISCV_MACROASSEMBLER_RISCV_HPP
 
 #include "asm/assembler.hpp"
+#include "metaprogramming/enableIf.hpp"
 
 // MacroAssembler extends Assembler by frequently used macros.
 //
@@ -44,21 +45,52 @@ class MacroAssembler: public Assembler {
   void safepoint_poll(Label& slow_path);
   void safepoint_poll_acquire(Label& slow_path);
 
+  // Biased locking support
+  // lock_reg and obj_reg must be loaded up with the appropriate values.
+  // swap_reg is killed.
+  // tmp_reg must be supplied and must not be rscratch1 or rscratch2
+  // Optional slow case is for implementations (interpreter and C1) which branch to
+  // slow case directly. Leaves condition codes set for C2's Fast_Lock node.
+  // Returns offset of first potentially-faulting instruction for null
+  // check info (currently consumed only by C1). If
+  // swap_reg_contains_mark is true then returns -1 as it is assumed
+  // the calling code has already passed any potential faults.
+  int biased_locking_enter(Register lock_reg, Register obj_reg,
+                           Register swap_reg, Register tmp_reg,
+                           bool swap_reg_contains_mark,
+                           Label& done, Label* slow_case = NULL,
+                           BiasedLockingCounters* counters = NULL,
+                           Register flag = noreg);
+  void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done, Register flag = noreg);
+
+  // Helper functions for statistics gathering.
+  // Unconditional atomic increment.
+  void atomic_incw(Register counter_addr, Register tmp);
+  void atomic_incw(Address counter_addr, Register tmp1, Register tmp2) {
+    la(tmp1, counter_addr);
+    atomic_incw(tmp1, tmp2);
+  }
+
+  // Place a fence.i after code may have been modified due to a safepoint.
+  void safepoint_ifence();
+
   // Alignment
-  void align(int modulus);
+  void align(int modulus, int extra_offset = 0);
 
   // Stack frame creation/removal
+  // Note that SP must be updated to the right place before saving/restoring RA and FP
+  // because signal based thread suspend/resume could happen asynchronously.
   void enter() {
     addi(sp, sp, - 2 * wordSize);
-    sd(lr, Address(sp, wordSize));
+    sd(ra, Address(sp, wordSize));
     sd(fp, Address(sp));
-    mv(fp, sp);
+    addi(fp, sp, 2 * wordSize);
   }
 
   void leave() {
-    mv(sp, fp);
+    addi(sp, fp, - 2 * wordSize);
     ld(fp, Address(sp));
-    ld(lr, Address(sp, wordSize));
+    ld(ra, Address(sp, wordSize));
     addi(sp, sp, 2 * wordSize);
   }
 
@@ -133,9 +165,9 @@ class MacroAssembler: public Assembler {
   void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3);
 
   // last Java Frame (fills frame anchor)
-  void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register temp);
-  void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register temp);
-  void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc,Register temp);
+  void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register tmp);
+  void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register tmp);
+  void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc, Register tmp);
 
   // thread in the default location (xthread)
   void reset_last_Java_frame(bool clear_fp);
@@ -216,6 +248,8 @@ class MacroAssembler: public Assembler {
   // stored using routines that take a jobject.
   void store_heap_oop_null(Address dst);
 
+  void load_prototype_header(Register dst, Register src);
+
   // This dummy is to prevent a call to store_heap_oop from
   // converting a zero (linke NULL) into a Register by giving
   // the compiler two choices it can't resolve
@@ -231,6 +265,7 @@ class MacroAssembler: public Assembler {
 
   virtual void null_check(Register reg, int offset = -1);
   static bool needs_explicit_null_check(intptr_t offset);
+  static bool uses_implicit_null_check(void* address);
 
   // idiv variant which deals with MINLONG as dividend and -1 as divisor
   int corrected_idivl(Register result, Register rs1, Register rs2,
@@ -243,7 +278,7 @@ class MacroAssembler: public Assembler {
                                Register intf_klass,
                                RegisterOrConstant itable_index,
                                Register method_result,
-                               Register scan_temp,
+                               Register scan_tmp,
                                Label& no_such_interface,
                                bool return_method = true);
 
@@ -260,21 +295,22 @@ class MacroAssembler: public Assembler {
   Address form_address(Register Rd, Register base, long byte_offset);
 
   // allocation
-  void eden_allocate(
+  void tlab_allocate(
     Register obj,                   // result: pointer to object after successful allocation
     Register var_size_in_bytes,     // object size in bytes if unknown at compile time; invalid otherwise
     int      con_size_in_bytes,     // object size in bytes if   known at compile time
     Register tmp1,                  // temp register
-    Label&   slow_case,             // continuation point if fast allocation fails
+    Register tmp2,                  // temp register
+    Label&   slow_case,             // continuation point of fast allocation fails
     bool is_far = false
   );
-  void tlab_allocate(
+
+  void eden_allocate(
     Register obj,                   // result: pointer to object after successful allocation
     Register var_size_in_bytes,     // object size in bytes if unknown at compile time; invalid otherwise
     int      con_size_in_bytes,     // object size in bytes if   known at compile time
-    Register tmp1,                  // temp register
-    Register tmp2,                  // temp register
-    Label&   slow_case,             // continuation point of fast allocation fails
+    Register tmp,                   // temp register
+    Label&   slow_case,             // continuation point if fast allocation fails
     bool is_far = false
   );
 
@@ -283,10 +319,10 @@ class MacroAssembler: public Assembler {
   // The fast path produces a tri-state answer: yes / no / maybe-slow.
   // One of the three labels can be NULL, meaning take the fall-through.
   // If super_check_offset is -1, the value is loaded up from super_klass.
-  // No registers are killed, except temp_reg
+  // No registers are killed, except tmp_reg
   void check_klass_subtype_fast_path(Register sub_klass,
                                      Register super_klass,
-                                     Register temp_reg,
+                                     Register tmp_reg,
                                      Label* L_success,
                                      Label* L_failure,
                                      Label* L_slow_path,
@@ -294,18 +330,18 @@ class MacroAssembler: public Assembler {
 
   // The reset of the type cehck; must be wired to a corresponding fast path.
   // It does not repeat the fast path logic, so don't use it standalone.
-  // The temp_reg and temp2_reg can be noreg, if no temps are avaliable.
+  // The tmp1_reg and tmp2_reg can be noreg, if no temps are avaliable.
   // Updates the sub's secondary super cache as necessary.
   void check_klass_subtype_slow_path(Register sub_klass,
                                      Register super_klass,
-                                     Register temp_reg,
-                                     Register temp2_reg,
+                                     Register tmp1_reg,
+                                     Register tmp2_reg,
                                      Label* L_success,
                                      Label* L_failure);
 
   void check_klass_subtype(Register sub_klass,
                            Register super_klass,
-                           Register temp_reg,
+                           Register tmp_reg,
                            Label& L_success);
 
   Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
@@ -386,17 +422,11 @@ class MacroAssembler: public Assembler {
 
   void should_not_reach_here() { stop("should not reach here"); }
 
-  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
-                                                Register tmp,
-                                                int offset) {
-    return RegisterOrConstant(tmp);
-  }
-
   static address target_addr_for_insn(address insn_addr);
 
   // Required platform-specific helpers for Label::patch_instructions.
   // They _shadow_ the declarations in AbstractAssembler, which are undefined.
-  static int pd_patch_instruction_size(address branch, address target) ;
+  static int pd_patch_instruction_size(address branch, address target);
   static void pd_patch_instruction(address branch, address target, const char* file = NULL, int line = 0) {
     pd_patch_instruction_size(branch, target);
   }
@@ -423,16 +453,14 @@ class MacroAssembler: public Assembler {
   void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
 
  public:
-  // enum used for riscv64--x86 linkage to define return type of x86 function
-  enum ret_type { ret_type_void, ret_type_integral, ret_type_float, ret_type_double};
-
   // Standard pseudoinstruction
   void nop();
-  void mv(Register Rd, Register Rs) ;
+  void mv(Register Rd, Register Rs);
   void notr(Register Rd, Register Rs);
   void neg(Register Rd, Register Rs);
   void negw(Register Rd, Register Rs);
-  void sext_w(Register Rd, Register Rs);        // mv Rd[31:0], Rs[31:0]
+  void sext_w(Register Rd, Register Rs);
+  void zext_b(Register Rd, Register Rs);
   void seqz(Register Rd, Register Rs);          // set if = zero
   void snez(Register Rd, Register Rs);          // set if != zero
   void sltz(Register Rd, Register Rs);          // set if < zero
@@ -508,8 +536,6 @@ class MacroAssembler: public Assembler {
   void pop_reg(Register Rd);
   int  push_reg(unsigned int bitset, Register stack);
   int  pop_reg(unsigned int bitset, Register stack);
-  void push_fp(RegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); }
-  void pop_fp(RegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); }
 
   // Push and pop everything that might be clobbered by a native
   // runtime call except t0 and t1. (They are always
@@ -540,11 +566,17 @@ class MacroAssembler: public Assembler {
   }
 
   // mv
-  void mv(Register Rd, int64_t imm64);
-  void mv(Register Rd, int imm);
-  void mvw(Register Rd, int32_t imm32);
+  inline void mv(Register Rd, int imm64)                { li(Rd, (int64_t)imm64); }
+  inline void mv(Register Rd, long imm64)               { li(Rd, (int64_t)imm64); }
+  inline void mv(Register Rd, long long imm64)          { li(Rd, (int64_t)imm64); }
+  inline void mv(Register Rd, unsigned int imm64)       { li(Rd, (int64_t)imm64); }
+  inline void mv(Register Rd, unsigned long imm64)      { li(Rd, (int64_t)imm64); }
+  inline void mv(Register Rd, unsigned long long imm64) { li(Rd, (int64_t)imm64); }
+
+  inline void mvw(Register Rd, int32_t imm32) { mv(Rd, imm32); }
+
   void mv(Register Rd, Address dest);
-  void mv(Register Rd, address addr);
+  void mv(Register Rd, address dest);
   void mv(Register Rd, RegisterOrConstant src);
 
   // logic
@@ -552,28 +584,22 @@ class MacroAssembler: public Assembler {
   void orrw(Register Rd, Register Rs1, Register Rs2);
   void xorrw(Register Rd, Register Rs1, Register Rs2);
 
-  // grev
-  void reverseb16(Register Rd, Register Rs, Register Rtmp1 = t0, Register Rtmp2= t1);  // reverse bytes in 16-bit and move to lower
-  void reverseh32(Register Rd, Register Rs, Register Rtmp1 = t0, Register Rtmp2= t1);  // reverse half-words in 32-bit and move to lower
-  void grevh(Register Rd, Register Rs, Register Rtmp = t0);                            // basic reverse bytes in 16-bit halfwords, sign-extend
-  void grev16w(Register Rd, Register Rs, Register Rtmp1 = t0, Register Rtmp2 = t1);    // reverse bytes in 16-bit halfwords(32), sign-extend
-  void grevw(Register Rd, Register Rs, Register Rtmp1 = t0, Register Rtmp2 = t1);      // reverse bytes(32), sign-extend
-  void grev16(Register Rd, Register Rs, Register Rtmp1 = t0, Register Rtmp2= t1);      // reverse bytes in 16-bit halfwords
-  void grev32(Register Rd, Register Rs, Register Rtmp1 = t0, Register Rtmp2= t1);      // reverse bytes in 32-bit words
-  void grev(Register Rd, Register Rs, Register Rtmp1 = t0, Register Rtmp2 = t1);       // reverse bytes in 64-bit double-words
-  void grevhu(Register Rd, Register Rs, Register Rtmp = t0);                           // basic reverse bytes in 16-bit halfwords, zero-extend
-  void grev16wu(Register Rd, Register Rs, Register Rtmp1 = t0, Register Rtmp2 = t1);   // reverse bytes in 16-bit halfwords(32), zero-extend
-  void grevwu(Register Rd, Register Rs, Register Rtmp1 = t0, Register Rtmp2 = t1);     // reverse bytes(32), zero-extend
-
-
-  void andi(Register Rd, Register Rn, int64_t increment, Register temp = t0);
+  // revb
+  void revb_h_h(Register Rd, Register Rs, Register tmp = t0);                           // reverse bytes in halfword in lower 16 bits, sign-extend
+  void revb_w_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);      // reverse bytes in lower word, sign-extend
+  void revb_h_h_u(Register Rd, Register Rs, Register tmp = t0);                         // reverse bytes in halfword in lower 16 bits, zero-extend
+  void revb_h_w_u(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);    // reverse bytes in halfwords in lower 32 bits, zero-extend
+  void revb_h_helper(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);  // reverse bytes in upper 16 bits (48:63) and move to lower
+  void revb_h(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);         // reverse bytes in each halfword
+  void revb_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);         // reverse bytes in each word
+  void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);          // reverse bytes in doubleword
+
+  void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0);
+  void andi(Register Rd, Register Rn, int64_t imm, Register tmp = t0);
   void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1);
 
-  // Support for serializing memory accesses between threads
-  void serialize_memory(Register thread, Register tmp1, Register tmp2);
-
   void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, Label &succeed, Label *fail);
-  void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail) ;
+  void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail);
   void cmpxchg(Register addr, Register expected,
                Register new_val,
                enum operand_size size,
@@ -613,35 +639,10 @@ class MacroAssembler: public Assembler {
   void atomic_xchgwu(Register prev, Register newv, Register addr);
   void atomic_xchgalwu(Register prev, Register newv, Register addr);
 
-  // Biased locking support
-  // lock_reg and obj_reg must be loaded up with the appropriate values.
-  // swap_reg is killed.
-  // tmp_reg must be supplied and must not be t0 or t1
-  // Optional slow case is for implementations (interpreter and C1) which branch to
-  // slow case directly. Leaves condition codes set for C2's Fast_Lock node.
-  // Returns offset of first potentially-faulting instruction for null
-  // check info (currently consumed only by C1). If
-  // swap_reg_contains_mark is true then returns -1 as it is assumed
-  // the calling code has already passed any potential faults.
-  int biased_locking_enter(Register lock_reg, Register obj_reg,
-                           Register swap_reg, Register tmp_reg,
-                           bool swap_reg_contains_mark,
-                           Label& done, Label* slow_case = NULL,
-                           BiasedLockingCounters* counters = NULL,
-                           Register flag = noreg);
-  void biased_locking_exit(Register obj_reg, Register temp_reg, Label& done, Register flag = noreg);
-
   static bool far_branches() {
     return ReservedCodeCacheSize > branch_range;
   }
 
-  //atomic
-  void atomic_incw(Register counter_addr, Register tmp1);
-  void atomic_incw(Address counter_addr, Register tmp1, Register tmp2) {
-    la(tmp1, counter_addr);
-    atomic_incw(tmp1, tmp2);
-  }
-
   // Jumps that can reach anywhere in the code cache.
   // Trashes tmp.
   void far_call(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0);
@@ -660,8 +661,8 @@ class MacroAssembler: public Assembler {
   void bang_stack_with_offset(int offset) {
     // stack grows down, caller passes positive offset
     assert(offset > 0, "must bang with negative offset");
-    sub(t1, sp, offset);
-    sd(zr, Address(t1));
+    sub(t0, sp, offset);
+    sd(zr, Address(t0));
   }
 
   void la_patchable(Register reg1, const Address &dest, int32_t &offset);
@@ -672,53 +673,19 @@ class MacroAssembler: public Assembler {
 
   #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__)
 
-#ifdef COMPILER2
-  void spill(Register r, bool is64, int offset) {
-    is64 ? sd(r, Address(sp, offset))
-         : sw(r, Address(sp, offset));
-  }
-
-  void spill(FloatRegister f, bool is64, int offset) {
-    is64 ? fsd(f, Address(sp, offset))
-         : fsw(f, Address(sp, offset));
-  }
-
-  void unspill(Register r, bool is64, int offset) {
-    is64 ? ld(r, Address(sp, offset))
-         : lw(r, Address(sp, offset));
-  }
-
-  void unspillu(Register r, bool is64, int offset) {
-    is64 ? ld(r, Address(sp, offset))
-         : lwu(r, Address(sp, offset));
-  }
-
-  void unspill(FloatRegister f, bool is64, int offset) {
-    is64 ? fld(f, Address(sp, offset))
-         : flw(f, Address(sp, offset));
-  }
-#endif // COMPILER2
-
-  void clear_upper_bits(Register r, unsigned upper_bits) {
-    assert(upper_bits < 64, "bit count to clear must be less than 64");
-
-    int sig_bits = 64 - upper_bits; // significance bits
-    if (sig_bits < 12) {
-      andi(r, r, (1UL << sig_bits) - 1);
-    } else {
-      zero_ext(r, r, upper_bits);
-    }
-  }
-
   // Frame creation and destruction shared between JITs.
   void build_frame(int framesize);
   void remove_frame(int framesize);
 
   void reserved_stack_check();
 
+  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
+                                                Register tmp,
+                                                int offset);
+
   void get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype);
-  address read_polling_page(Register r, address page, relocInfo::relocType rtype);
-  address read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype);
+  void read_polling_page(Register r, address page, relocInfo::relocType rtype);
+  void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype);
 
   address trampoline_call(Address entry, CodeBuffer* cbuf = NULL);
   address ic_call(address entry, jint method_index = 0);
@@ -728,79 +695,61 @@ class MacroAssembler: public Assembler {
 
   void cmpptr(Register src1, Address src2, Label& equal);
 
-  void load_method_holder(Register holder, Register method);
-
-  void oop_beq(Register obj1, Register obj2, Label& L_equal, bool is_far = false);
-  void oop_bne(Register obj1, Register obj2, Label& L_nequal, bool is_far = false);
-
-#ifdef COMPILER2
-  void arrays_equals(Register r1, Register r2,
-                     Register tmp3, Register tmp4,
-                     Register tmp5, Register tmp6,
-                     Register result, Register cnt1,
-                     int elem_size);
-
-  void string_equals(Register r1, Register r2,
-                     Register result, Register cnt1,
-                     int elem_size);
-
-  void string_compare(Register str1, Register str2,
-                      Register cnt1, Register cnt2, Register result,
-                      Register tmp1, Register tmp2, Register tmp3,
-                      int ae);
-#endif
-
-  void compute_index(Register str1, Register trailing_zero, Register match_mask,
+  void compute_index(Register str1, Register trailing_zeros, Register match_mask,
                      Register result, Register char_tmp, Register tmp,
                      bool haystack_isL);
   void compute_match_mask(Register src, Register pattern, Register match_mask,
                           Register mask1, Register mask2);
 
-  void inflate_lo32(Register Rd, Register Rs, Register Rtmp1 = t0, Register Rtmp2 = t1);
-  void inflate_hi32(Register Rd, Register Rs, Register Rtmp1 = t0, Register Rtmp2 = t1);
+#ifdef COMPILER2
+  void mul_add(Register out, Register in, Register offset,
+               Register len, Register k, Register tmp);
+  void cad(Register dst, Register src1, Register src2, Register carry);
+  void cadc(Register dst, Register src1, Register src2, Register carry);
+  void adc(Register dst, Register src1, Register src2, Register carry);
+  void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
+                       Register src1, Register src2, Register carry);
+  void multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
+                             Register y, Register y_idx, Register z,
+                             Register carry, Register product,
+                             Register idx, Register kdx);
+  void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
+                             Register y, Register y_idx, Register z,
+                             Register carry, Register product,
+                             Register idx, Register kdx);
+  void multiply_128_x_128_loop(Register y, Register z,
+                               Register carry, Register carry2,
+                               Register idx, Register jdx,
+                               Register yz_idx1, Register yz_idx2,
+                               Register tmp, Register tmp3, Register tmp4,
+                               Register tmp6, Register product_hi);
+  void multiply_to_len(Register x, Register xlen, Register y, Register ylen,
+                       Register z, Register zlen,
+                       Register tmp1, Register tmp2, Register tmp3, Register tmp4,
+                       Register tmp5, Register tmp6, Register product_hi);
+#endif
+
+  void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
+  void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
 
-  void ctzc_bit(Register Rd, Register Rs, bool isLL = false, Register Rtmp1 = t0, Register Rtmp2 = t1);
+  void ctzc_bit(Register Rd, Register Rs, bool isLL = false, Register tmp1 = t0, Register tmp2 = t1);
 
   void zero_words(Register base, u_int64_t cnt);
   address zero_words(Register ptr, Register cnt);
   void fill_words(Register base, Register cnt, Register value);
-  void zero_memory(Register addr, Register len, Register tmp1);
+  void zero_memory(Register addr, Register len, Register tmp);
 
   // shift left by shamt and add
   void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt);
 
-#ifdef COMPILER2
-  // refer to conditional_branches and float_conditional_branches
-  static const int bool_test_bits = 3;
-  static const int neg_cond_bits = 2;
-  static const int unsigned_branch_mask = 1 << bool_test_bits;
-  static const int double_branch_mask = 1 << bool_test_bits;
-
-  // cmp
-  void cmp_branch(int cmpFlag,
-                  Register op1, Register op2,
-                  Label& label, bool is_far = false);
-
-  void float_cmp_branch(int cmpFlag,
-                        FloatRegister op1, FloatRegister op2,
-                        Label& label, bool is_far = false);
-
-  void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op,
-                                    Label& L, bool is_far = false);
-
-  void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op,
-                               Label& L, bool is_far = false);
-
-#endif
-
   // Here the float instructions with safe deal with some exceptions.
   // e.g. convert from NaN, +Inf, -Inf to int, float, double
   // will trigger exception, we need to deal with these situations
   // to get correct results.
-  void fcvt_w_s_safe(Register dst, FloatRegister src, Register temp = t0);
-  void fcvt_l_s_safe(Register dst, FloatRegister src, Register temp = t0);
-  void fcvt_w_d_safe(Register dst, FloatRegister src, Register temp = t0);
-  void fcvt_l_d_safe(Register dst, FloatRegister src, Register temp = t0);
+  void fcvt_w_s_safe(Register dst, FloatRegister src, Register tmp = t0);
+  void fcvt_l_s_safe(Register dst, FloatRegister src, Register tmp = t0);
+  void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0);
+  void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0);
 
   // vector load/store unit-stride instructions
   void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
@@ -847,13 +796,13 @@ class MacroAssembler: public Assembler {
         sltu(Rt, zr, Rt);
         break;
       case T_CHAR   :
-        zero_ext(Rt, Rt, registerSize - 16);
+        zero_extend(Rt, Rt, 16);
         break;
       case T_BYTE   :
-        sign_ext(Rt, Rt, registerSize - 8);
+        sign_extend(Rt, Rt, 8);
         break;
       case T_SHORT  :
-        sign_ext(Rt, Rt, registerSize - 16);
+        sign_extend(Rt, Rt, 16);
         break;
       case T_INT    :
         addw(Rt, Rt, zr);
@@ -871,8 +820,8 @@ class MacroAssembler: public Assembler {
   void double_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
 
   // Zero/Sign-extend
-  void zero_ext(Register dst, Register src, int clear_bits);
-  void sign_ext(Register dst, Register src, int clear_bits);
+  void zero_extend(Register dst, Register src, int bits);
+  void sign_extend(Register dst, Register src, int bits);
 
   // compare src1 and src2 and get -1/0/1 in dst.
   // if [src1 > src2], dst = 1;
@@ -888,7 +837,57 @@ class MacroAssembler: public Assembler {
   void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked);
   void vfneg_v(VectorRegister vd, VectorRegister vs);
 
-#ifdef COMPILER2
+private:
+
+#ifdef ASSERT
+  // Macro short-hand support to clean-up after a failed call to trampoline
+  // call generation (see trampoline_call() below), when a set of Labels must
+  // be reset (before returning).
+#define reset_labels1(L1) L1.reset()
+#define reset_labels2(L1, L2) L1.reset(); L2.reset()
+#define reset_labels3(L1, L2, L3) L1.reset(); reset_labels2(L2, L3)
+#define reset_labels5(L1, L2, L3, L4, L5) reset_labels2(L1, L2); reset_labels3(L3, L4, L5)
+#endif
+  void repne_scan(Register addr, Register value, Register count, Register tmp);
+
+  // Return true if an address is within the 48-bit RISCV64 address space.
+  bool is_valid_riscv64_address(address addr) {
+    // sv48: must have bits 63–48 all equal to bit 47
+    return ((uintptr_t)addr >> 47) == 0;
+  }
+
+  void ld_constant(Register dest, const Address &const_addr) {
+    if (NearCpool) {
+      ld(dest, const_addr);
+    } else {
+      int32_t offset = 0;
+      la_patchable(dest, InternalAddress(const_addr.target()), offset);
+      ld(dest, Address(dest, offset));
+    }
+  }
+
+  int bitset_to_regs(unsigned int bitset, unsigned char* regs);
+  Address add_memory_helper(const Address dst);
+
+  void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire);
+  void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release);
+
+public:
+  void string_compare(Register str1, Register str2,
+                      Register cnt1, Register cnt2, Register result,
+                      Register tmp1, Register tmp2, Register tmp3,
+                      int ae);
+
+  void string_indexof_char_short(Register str1, Register cnt1,
+                                 Register ch, Register result,
+                                 bool isL);
+
+  void string_indexof_char(Register str1, Register cnt1,
+                           Register ch, Register result,
+                           Register tmp1, Register tmp2,
+                           Register tmp3, Register tmp4,
+                           bool isL);
+
   void string_indexof(Register str1, Register str2,
                       Register cnt1, Register cnt2,
                       Register tmp1, Register tmp2,
@@ -902,20 +901,71 @@ class MacroAssembler: public Assembler {
                                  Register tmp3, Register tmp4,
                                  int needle_con_cnt, Register result, int ae);
 
+  void arrays_equals(Register r1, Register r2,
+                     Register tmp3, Register tmp4,
+                     Register tmp5, Register tmp6,
+                     Register result, Register cnt1,
+                     int elem_size);
+
+  void string_equals(Register r1, Register r2,
+                     Register result, Register cnt1,
+                     int elem_size);
+
+  // refer to conditional_branches and float_conditional_branches
+  static const int bool_test_bits = 3;
+  static const int neg_cond_bits = 2;
+  static const int unsigned_branch_mask = 1 << bool_test_bits;
+  static const int double_branch_mask = 1 << bool_test_bits;
+
+  // cmp
+  void cmp_branch(int cmpFlag,
+                  Register op1, Register op2,
+                  Label& label, bool is_far = false);
+
+  void float_cmp_branch(int cmpFlag,
+                        FloatRegister op1, FloatRegister op2,
+                        Label& label, bool is_far = false);
+
+  void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op,
+                                    Label& L, bool is_far = false);
+
+  void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op,
+                               Label& L, bool is_far = false);
 
   void enc_cmove(int cmpFlag,
                  Register op1, Register op2,
                  Register dst, Register src);
 
-  void minmax_FD(FloatRegister dst,
-                 FloatRegister src1, FloatRegister src2,
-                 bool is_double, bool is_min);
+  void spill(Register r, bool is64, int offset) {
+    is64 ? sd(r, Address(sp, offset))
+         : sw(r, Address(sp, offset));
+  }
+
+  void spill(FloatRegister f, bool is64, int offset) {
+    is64 ? fsd(f, Address(sp, offset))
+         : fsw(f, Address(sp, offset));
+  }
 
   void spill(VectorRegister v, int offset) {
     add(t0, sp, offset);
     vs1r_v(v, t0);
   }
 
+  void unspill(Register r, bool is64, int offset) {
+    is64 ? ld(r, Address(sp, offset))
+         : lw(r, Address(sp, offset));
+  }
+
+  void unspillu(Register r, bool is64, int offset) {
+    is64 ? ld(r, Address(sp, offset))
+         : lwu(r, Address(sp, offset));
+  }
+
+  void unspill(FloatRegister f, bool is64, int offset) {
+    is64 ? fld(f, Address(sp, offset))
+         : flw(f, Address(sp, offset));
+  }
+
   void unspill(VectorRegister v, int offset) {
     add(t0, sp, offset);
     vl1r_v(v, t0);
@@ -927,6 +977,10 @@ class MacroAssembler: public Assembler {
     spill(v0, dst_offset);
   }
 
+  void minmax_FD(FloatRegister dst,
+                 FloatRegister src1, FloatRegister src2,
+                 bool is_double, bool is_min);
+
 private:
   void element_compare(Register r1, Register r2,
                        Register result, Register cnt,
@@ -972,45 +1026,6 @@ class MacroAssembler: public Assembler {
                           VectorRegister tmp1, VectorRegister tmp2,
                           bool is_double, bool is_min);
 
-#endif // COMPILER2
-
-private:
-
-#ifdef ASSERT
-  // Template short-hand support to clean-up after a failed call to trampoline
-  // call generation (see trampoline_call() below), when a set of Labels must
-  // be reset (before returning).
-  #define reset_labels1(L1) L1.reset()
-  #define reset_labels2(L1, L2) L1.reset(); L2.reset()
-  #define reset_labels3(L1, L2, L3) L1.reset(); reset_labels2(L2, L3)
-  #define reset_labels5(L1, L2, L3, L4, L5) reset_labels2(L1, L2); reset_labels3(L3, L4, L5)
-#endif
-  void load_prototype_header(Register dst, Register src);
-  void repne_scan(Register addr, Register value, Register count, Register temp);
-
-  // Return true if an addres is within the 48-bit Riscv64 address
-  // space.
-  bool is_valid_riscv64_address(address addr) {
-    // sv48: must have bits 63–48 all equal to bit 47
-    return ((uintptr_t)addr >> 47) == 0;
-  }
-
-  void ld_constant(Register dest, const Address &const_addr) {
-    if (NearCpool) {
-      ld(dest, const_addr);
-    } else {
-      int32_t offset = 0;
-      la_patchable(dest, InternalAddress(const_addr.target()), offset);
-      ld(dest, Address(dest, offset));
-    }
-  }
-
-  int bitset_to_fregs(unsigned int bitset, unsigned char* regs);
-  int bitset_to_regs(unsigned int bitset, unsigned char* regs);
-  Address add_memory_helper(const Address dst);
-
-  void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire);
-  void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release);
 };
 
 #ifdef ASSERT
@@ -1034,4 +1049,5 @@ class SkipIfEqual {
    SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
    ~SkipIfEqual();
 };
+
 #endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp
index ced89faef1d..ef968ccd96d 100644
--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp
+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp
@@ -1,7 +1,6 @@
 /*
  * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,4 +26,6 @@
 #ifndef CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP
 #define CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP
 
+// Still empty.
+
 #endif // CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP
diff --git a/src/hotspot/cpu/riscv/matcher_riscv.hpp b/src/hotspot/cpu/riscv/matcher_riscv.hpp
deleted file mode 100644
index e32cce0a32a..00000000000
--- a/src/hotspot/cpu/riscv/matcher_riscv.hpp
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef CPU_RISCV_MATCHER_RISCV_HPP
-#define CPU_RISCV_MATCHER_RISCV_HPP
-
-  // Defined within class Matcher
-
-  // false => size gets scaled to BytesPerLong, ok.
-  static const bool init_array_count_is_in_bytes = false;
-
-  // riscv64 supports misaligned vectors store/load.
-  static constexpr bool misaligned_vectors_ok() {
-    return false;
-  }
-
-  // Whether code generation need accurate ConvI2L types.
-  static const bool convi2l_type_required = false;
-
-  // Does the CPU require late expand (see block.cpp for description of late expand)?
-  static const bool require_postalloc_expand = false;
-
-  // Do we need to mask the count passed to shift instructions or does
-  // the cpu only look at the lower 5/6 bits anyway?
-  static const bool need_masked_shift_count = false;
-
-  // No support for generic vector operands.
-  static const bool supports_generic_vector_operands = false;
-
-  static constexpr bool isSimpleConstant64(jlong value) {
-    // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
-    // Probably always true, even if a temp register is required.
-    return true;
-  }
-
-  // Use conditional move (CMOVL)
-  static constexpr int long_cmove_cost() {
-    // long cmoves are no more expensive than int cmoves
-    return 0;
-  }
-
-  static constexpr int float_cmove_cost() {
-    // float cmoves are no more expensive than int cmoves
-    return 0;
-  }
-
-  // This affects two different things:
-  //  - how Decode nodes are matched
-  //  - how ImplicitNullCheck opportunities are recognized
-  // If true, the matcher will try to remove all Decodes and match them
-  // (as operands) into nodes. NullChecks are not prepared to deal with
-  // Decodes by final_graph_reshaping().
-  // If false, final_graph_reshaping() forces the decode behind the Cmp
-  // for a NullCheck. The matcher matches the Decode node into a register.
-  // Implicit_null_check optimization moves the Decode along with the
-  // memory operation back up before the NullCheck.
-  static bool narrow_oop_use_complex_address() {
-    return Universe::narrow_oop_shift() == 0;
-  }
-
-  static bool narrow_klass_use_complex_address() {
-    return false;
-  }
-
-  static bool const_oop_prefer_decode() {
-    // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
-    return Universe::narrow_oop_base() == NULL;
-  }
-
-  static bool const_klass_prefer_decode() {
-    // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
-    return Universe::narrow_klass_base() == NULL;
-  }
-
-  // Is it better to copy float constants, or load them directly from
-  // memory?  Intel can load a float constant from a direct address,
-  // requiring no extra registers.  Most RISCs will have to materialize
-  // an address into a register first, so they would do better to copy
-  // the constant from stack.
-  static const bool rematerialize_float_constants = false;
-
-  // If CPU can load and store mis-aligned doubles directly then no
-  // fixup is needed.  Else we split the double into 2 integer pieces
-  // and move it piece-by-piece.  Only happens when passing doubles into
-  // C code as the Java calling convention forces doubles to be aligned.
-  static const bool misaligned_doubles_ok = true;
-
-  // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
-  static const bool strict_fp_requires_explicit_rounding = false;
-
-  // Are floats converted to double when stored to stack during
-  // deoptimization?
-  static constexpr bool float_in_double() { return false; }
-
-  // Do ints take an entire long register or just half?
-  // The relevant question is how the int is callee-saved:
-  // the whole long is written but de-opt'ing will have to extract
-  // the relevant 32 bits.
-  static const bool int_in_long = true;
-
-  // Does the CPU supports vector variable shift instructions?
-  static constexpr bool supports_vector_variable_shifts(void) {
-    return false;
-  }
-
-  // Does the CPU supports vector variable rotate instructions?
-  static constexpr bool supports_vector_variable_rotates(void) {
-    return false;
-  }
-
-  // Some microarchitectures have mask registers used on vectors
-  static const bool has_predicated_vectors(void) {
-    return false;
-  }
-
-  // true means we have fast l2f convers
-  // false means that conversion is done by runtime call
-  static constexpr bool convL2FSupported(void) {
-      return true;
-  }
-
-#endif // CPU_RISCV_MATCHER_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
index 26eedd09b1d..fd907f77afb 100644
--- a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
+++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -34,6 +34,7 @@
 #include "prims/methodHandles.hpp"
 #include "runtime/flags/flagSetting.hpp"
 #include "runtime/frame.inline.hpp"
+#include "runtime/stubRoutines.hpp"
 
 #define __ _masm->
 
@@ -91,7 +92,7 @@ void MethodHandles::verify_klass(MacroAssembler* _masm,
   BLOCK_COMMENT("} verify_klass");
 }
 
-void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {  }
+void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {}
 
 #endif //ASSERT
 
@@ -154,7 +155,7 @@ void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
                         sizeof(u2), /*is_signed*/ false);
     Label L;
     __ ld(t0, __ argument_address(temp2, -1));
-    __ oop_beq(recv, t0, L);
+    __ beq(recv, t0, L);
     __ ld(x10, __ argument_address(temp2, -1));
     __ ebreak();
     __ BIND(L);
@@ -181,8 +182,9 @@ address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler*
 
   // x30: sender SP (must preserve; see prepare_to_jump_from_interpreted)
   // xmethod: Method*
-  // x13: argument locator (parameter slot count, added to rsp)
+  // x13: argument locator (parameter slot count, added to sp)
   // x11: used as temp to hold mh or receiver
+  // x10, x29: garbage temps, blown away
   Register argp   = x13;   // argument list ptr, live on error paths
   Register mh     = x11;   // MH receiver; dies quickly and is recycled
 
@@ -232,7 +234,6 @@ address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler*
   trace_method_handle_interpreter_entry(_masm, iid);
   if (iid == vmIntrinsics::_invokeBasic) {
     generate_method_handle_dispatch(_masm, iid, mh, noreg, not_for_compiler_entry);
-
   } else {
     // Adjust argument list by popping the trailing MemberName argument.
     Register recv = noreg;
@@ -430,7 +431,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
 
 #ifndef PRODUCT
 void trace_method_handle_stub(const char* adaptername,
-                              oop mh,
+                              oopDesc* mh,
                               intptr_t* saved_regs,
                               intptr_t* entry_sp) {  }
 
diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.hpp b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp
index 48e9554635d..65493eba764 100644
--- a/src/hotspot/cpu/riscv/methodHandles_riscv.hpp
+++ b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp
@@ -1,6 +1,5 @@
 /*
  * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
  * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
index 8520b41aedf..6bd0cb997dd 100644
--- a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
+++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -235,24 +235,20 @@ void NativeMovConstReg::set_data(intptr_t x) {
   // Find and replace the oop/metadata corresponding to this
   // instruction in oops section.
   CodeBlob* cb = CodeCache::find_blob(instruction_address());
-  if(cb != NULL) {
-    nmethod* nm = cb->as_nmethod_or_null();
-    if (nm != NULL) {
-      RelocIterator iter(nm, instruction_address(), next_instruction_address());
-      while (iter.next()) {
-        if (iter.type() == relocInfo::oop_type) {
-          oop* oop_addr = iter.oop_reloc()->oop_addr();
-          *oop_addr = cast_to_oop(x);
-          break;
-        } else if (iter.type() == relocInfo::metadata_type) {
-          Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr();
-          *metadata_addr = (Metadata*)x;
-          break;
-        }
+  nmethod* nm = cb->as_nmethod_or_null();
+  if (nm != NULL) {
+    RelocIterator iter(nm, instruction_address(), next_instruction_address());
+    while (iter.next()) {
+      if (iter.type() == relocInfo::oop_type) {
+        oop* oop_addr = iter.oop_reloc()->oop_addr();
+        *oop_addr = cast_to_oop(x);
+        break;
+      } else if (iter.type() == relocInfo::metadata_type) {
+        Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr();
+        *metadata_addr = (Metadata*)x;
+        break;
       }
     }
-  } else {
-    ShouldNotReachHere();
   }
 }
 
@@ -326,10 +322,9 @@ bool NativeInstruction::is_safepoint_poll() {
 
 bool NativeInstruction::is_lwu_to_zr(address instr) {
   assert_cond(instr != NULL);
-  unsigned insn = *(unsigned*)instr;
-  return (Assembler::extract(insn, 6, 0) == 0b0000011 &&
-          Assembler::extract(insn, 14, 12) == 0b110 &&
-          Assembler::extract(insn, 11, 7) == 0b00000); // zr
+  return (extract_opcode(instr) == 0b0000011 &&
+          extract_funct3(instr) == 0b110 &&
+          extract_rd(instr) == zr);         // zr
 }
 
 // A 16-bit instruction with all bits ones is permanently reserved as an illegal instruction.
@@ -358,7 +353,7 @@ void NativeJump::patch_verified_entry(address entry, address verified_entry, add
 
   assert(nativeInstruction_at(verified_entry)->is_jump_or_nop() ||
          nativeInstruction_at(verified_entry)->is_sigill_zombie_not_entrant(),
-         "riscv64 cannot replace non-jump with jump");
+         "riscv cannot replace non-jump with jump");
 
   // Patch this nmethod atomically.
   if (Assembler::reachable_from_branch_at(verified_entry, dest)) {
@@ -384,8 +379,6 @@ void NativeJump::patch_verified_entry(address entry, address verified_entry, add
 }
 
 void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
-  NativeGeneralJump* n_jump = (NativeGeneralJump*)code_pos;
-
   CodeBuffer cb(code_pos, instruction_size);
   MacroAssembler a(&cb);
 
@@ -434,4 +427,3 @@ void NativeMembar::set_kind(uint32_t order_kind) {
   address membar = addr_at(0);
   *(unsigned int*) membar = insn;
 }
-
diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.hpp b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp
index eab5467aab2..781df54e0b8 100644
--- a/src/hotspot/cpu/riscv/nativeInst_riscv.hpp
+++ b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -54,6 +54,7 @@ class NativeInstruction {
  public:
   enum {
     instruction_size = 4,
+    compressed_instruction_size = 2,
   };
 
   juint encoding() const {
@@ -65,35 +66,25 @@ class NativeInstruction {
   bool is_call()                            const { return is_call_at(addr_at(0));        }
   bool is_jump()                            const { return is_jump_at(addr_at(0));        }
 
-  static bool is_jal_at(address instr)        { assert_cond(instr != NULL); return Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b1101111; }
-  static bool is_jalr_at(address instr)       { assert_cond(instr != NULL); return (Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b1100111 &&
-                                                                                    Assembler::extract(((unsigned*)instr)[0], 14, 12) == 0b000); }
-  static bool is_branch_at(address instr)     { assert_cond(instr != NULL); return Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b1100011; }
-  static bool is_ld_at(address instr)         { assert_cond(instr != NULL); return (Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b0000011 &&
-                                                                                    Assembler::extract(((unsigned*)instr)[0], 14, 12) == 0b011); }
-  static bool is_load_at(address instr)       { assert_cond(instr != NULL); return Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b0000011; }
+  static bool is_jal_at(address instr)        { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1101111; }
+  static bool is_jalr_at(address instr)       { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1100111 && extract_funct3(instr) == 0b000; }
+  static bool is_branch_at(address instr)     { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1100011; }
+  static bool is_ld_at(address instr)         { assert_cond(instr != NULL); return is_load_at(instr) && extract_funct3(instr) == 0b011; }
+  static bool is_load_at(address instr)       { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0000011; }
   static bool is_store_at(address instr)      { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0100011; }
   static bool is_float_load_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0000111; }
-  static bool is_auipc_at(address instr)      { assert_cond(instr != NULL); return Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b0010111; }
-  static bool is_jump_at(address instr)       { assert_cond(instr != NULL); return (is_branch_at(instr) || is_jal_at(instr) || is_jalr_at(instr)); }
-  static bool is_addi_at(address instr)       { assert_cond(instr != NULL); return (Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b0010011 &&
-                                                                                    Assembler::extract(((unsigned*)instr)[0], 14, 12) == 0b000); }
-  static bool is_addiw_at(address instr)      { assert_cond(instr != NULL); return (Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b0011011 &&
-                                                                                    Assembler::extract(((unsigned*)instr)[0], 14, 12) == 0b000); }
-  static bool is_lui_at(address instr)        { assert_cond(instr != NULL); return Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b0110111; }
+  static bool is_auipc_at(address instr)      { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0010111; }
+  static bool is_jump_at(address instr)       { assert_cond(instr != NULL); return is_branch_at(instr) || is_jal_at(instr) || is_jalr_at(instr); }
+  static bool is_addi_at(address instr)       { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0010011 && extract_funct3(instr) == 0b000; }
+  static bool is_addiw_at(address instr)      { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0011011 && extract_funct3(instr) == 0b000; }
+  static bool is_lui_at(address instr)        { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0110111; }
   static bool is_slli_shift_at(address instr, uint32_t shift) {
     assert_cond(instr != NULL);
-    return (Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b0010011 && // opcode field
-            Assembler::extract(((unsigned*)instr)[0], 14, 12) == 0b001 &&   // funct3 field, select the type of operation
+    return (extract_opcode(instr) == 0b0010011 && // opcode field
+            extract_funct3(instr) == 0b001 &&     // funct3 field, select the type of operation
             Assembler::extract(((unsigned*)instr)[0], 25, 20) == shift);    // shamt field
   }
 
-  // return true if the (index1~index2) field of instr1 is equal to (index3~index4) field of instr2, otherwise false
-  static bool compare_instr_field(address instr1, int index1, int index2, address instr2, int index3, int index4) {
-    assert_cond(instr1 != NULL && instr2 != NULL);
-    return Assembler::extract(((unsigned*)instr1)[0], index1, index2) == Assembler::extract(((unsigned*)instr2)[0], index3, index4);
-  }
-
   static Register extract_rs1(address instr);
   static Register extract_rs2(address instr);
   static Register extract_rd(address instr);
@@ -109,15 +100,21 @@ class NativeInstruction {
   //     slli
   //     addi/jalr/load
   static bool check_movptr_data_dependency(address instr) {
-    return compare_instr_field(instr + 4, 19, 15, instr, 11, 7)       &&     // check the rs1 field of addi and the rd field of lui
-           compare_instr_field(instr + 4, 19, 15, instr + 4, 11, 7)   &&     // check the rs1 field and the rd field of addi
-           compare_instr_field(instr + 8, 19, 15, instr + 4, 11, 7)   &&     // check the rs1 field of slli and the rd field of addi
-           compare_instr_field(instr + 8, 19, 15, instr + 8, 11, 7)   &&     // check the rs1 field and the rd field of slli
-           compare_instr_field(instr + 12, 19, 15, instr + 8, 11, 7)  &&     // check the rs1 field of addi and the rd field of slli
-           compare_instr_field(instr + 12, 19, 15, instr + 12, 11, 7) &&     // check the rs1 field and the rd field of addi
-           compare_instr_field(instr + 16, 19, 15, instr + 12, 11, 7) &&     // check the rs1 field of slli and the rd field of addi
-           compare_instr_field(instr + 16, 19, 15, instr + 16, 11, 7) &&     // check the rs1 field and the rd field of slli
-           compare_instr_field(instr + 20, 19, 15, instr + 16, 11, 7);       // check the rs1 field of addi/jalr/load and the rd field of slli
+    address lui = instr;
+    address addi1 = lui + instruction_size;
+    address slli1 = addi1 + instruction_size;
+    address addi2 = slli1 + instruction_size;
+    address slli2 = addi2 + instruction_size;
+    address last_instr = slli2 + instruction_size;
+    return extract_rs1(addi1) == extract_rd(lui) &&
+           extract_rs1(addi1) == extract_rd(addi1) &&
+           extract_rs1(slli1) == extract_rd(addi1) &&
+           extract_rs1(slli1) == extract_rd(slli1) &&
+           extract_rs1(addi2) == extract_rd(slli1) &&
+           extract_rs1(addi2) == extract_rd(addi2) &&
+           extract_rs1(slli2) == extract_rd(addi2) &&
+           extract_rs1(slli2) == extract_rd(slli2) &&
+           extract_rs1(last_instr) == extract_rd(slli2);
   }
 
   // the instruction sequence of li64 is as below:
@@ -130,43 +127,60 @@ class NativeInstruction {
   //     slli
   //     addi
   static bool check_li64_data_dependency(address instr) {
-    return compare_instr_field(instr + 4, 19, 15, instr, 11, 7)       &&  // check the rs1 field of addi and the rd field of lui
-           compare_instr_field(instr + 4, 19, 15, instr + 4, 11, 7)   &&  // check the rs1 field and the rd field of addi
-           compare_instr_field(instr + 8, 19, 15, instr + 4, 11, 7)   &&  // check the rs1 field of slli and the rd field of addi
-           compare_instr_field(instr + 8, 19, 15, instr + 8, 11, 7)   &&  // check the rs1 field and the rd field of slli
-           compare_instr_field(instr + 12, 19, 15, instr + 8, 11, 7)  &&  // check the rs1 field of addi and the rd field of slli
-           compare_instr_field(instr + 12, 19, 15, instr + 12, 11, 7) &&  // check the rs1 field and the rd field of addi
-           compare_instr_field(instr + 16, 19, 15, instr + 12, 11, 7) &&  // check the rs1 field of slli and the rd field of addi
-           compare_instr_field(instr + 16, 19, 15, instr + 16, 11, 7) &&  // check the rs1 field and the rd field fof slli
-           compare_instr_field(instr + 20, 19, 15, instr + 16, 11, 7) &&  // check the rs1 field of addi and the rd field of slli
-           compare_instr_field(instr + 20, 19, 15, instr + 20, 11, 7) &&  // check the rs1 field and the rd field of addi
-           compare_instr_field(instr + 24, 19, 15, instr + 20, 11, 7) &&  // check the rs1 field of slli and the rd field of addi
-           compare_instr_field(instr + 24, 19, 15, instr + 24, 11, 7) &&  // check the rs1 field and the rd field of slli
-           compare_instr_field(instr + 28, 19, 15, instr + 24, 11, 7) &&  // check the rs1 field of addi and the rd field of slli
-           compare_instr_field(instr + 28, 19, 15, instr + 28, 11, 7);    // check the rs1 field and the rd field of addi
+    address lui = instr;
+    address addi1 = lui + instruction_size;
+    address slli1 = addi1 + instruction_size;
+    address addi2 = slli1 + instruction_size;
+    address slli2 = addi2 + instruction_size;
+    address addi3 = slli2 + instruction_size;
+    address slli3 = addi3 + instruction_size;
+    address addi4 = slli3 + instruction_size;
+    return extract_rs1(addi1) == extract_rd(lui) &&
+           extract_rs1(addi1) == extract_rd(addi1) &&
+           extract_rs1(slli1) == extract_rd(addi1) &&
+           extract_rs1(slli1) == extract_rd(slli1) &&
+           extract_rs1(addi2) == extract_rd(slli1) &&
+           extract_rs1(addi2) == extract_rd(addi2) &&
+           extract_rs1(slli2) == extract_rd(addi2) &&
+           extract_rs1(slli2) == extract_rd(slli2) &&
+           extract_rs1(addi3) == extract_rd(slli2) &&
+           extract_rs1(addi3) == extract_rd(addi3) &&
+           extract_rs1(slli3) == extract_rd(addi3) &&
+           extract_rs1(slli3) == extract_rd(slli3) &&
+           extract_rs1(addi4) == extract_rd(slli3) &&
+           extract_rs1(addi4) == extract_rd(addi4);
   }
 
   // the instruction sequence of li32 is as below:
   //     lui
   //     addiw
   static bool check_li32_data_dependency(address instr) {
-    return compare_instr_field(instr + 4, 19, 15, instr, 11, 7) &&     // check the rs1 field of addiw and the rd field of lui
-           compare_instr_field(instr + 4, 19, 15, instr + 4, 11, 7);   // check the rs1 field and the rd field of addiw
+    address lui = instr;
+    address addiw = lui + instruction_size;
+
+    return extract_rs1(addiw) == extract_rd(lui) &&
+           extract_rs1(addiw) == extract_rd(addiw);
   }
 
   // the instruction sequence of pc-relative is as below:
   //     auipc
   //     jalr/addi/load/float_load
   static bool check_pc_relative_data_dependency(address instr) {
-    return compare_instr_field(instr, 11, 7, instr + 4, 19, 15);          // check the rd field of auipc and the rs1 field of jalr/addi/load/float_load
+    address auipc = instr;
+    address last_instr = auipc + instruction_size;
+
+    return extract_rs1(last_instr) == extract_rd(auipc);
   }
 
   // the instruction sequence of load_label is as below:
   //     auipc
   //     load
   static bool check_load_pc_relative_data_dependency(address instr) {
-    return compare_instr_field(instr, 11, 7, instr + 4, 11, 7) &&      // check the rd field of auipc and the rd field of load
-           compare_instr_field(instr + 4, 19, 15, instr + 4, 11, 7);   // check the rs1 field of load and the rd field of load
+    address auipc = instr;
+    address load = auipc + instruction_size;
+
+    return extract_rd(load) == extract_rd(auipc) &&
+           extract_rs1(load) == extract_rd(load);
   }
 
   static bool is_movptr_at(address instr);
@@ -184,10 +198,7 @@ class NativeInstruction {
   static bool is_lwu_to_zr(address instr);
 
   inline bool is_nop();
-  inline bool is_illegal();
-  inline bool is_return();
   inline bool is_jump_or_nop();
-  inline bool is_cond_jump();
   bool is_safepoint_poll();
   bool is_sigill_zombie_not_entrant();
   bool is_stop();
@@ -197,7 +208,6 @@ class NativeInstruction {
 
   jint int_at(int offset) const        { return *(jint*) addr_at(offset); }
   juint uint_at(int offset) const      { return *(juint*) addr_at(offset); }
-  jushort uint16_at(int offset) const { return *(jushort *) addr_at(offset); }
 
   address ptr_at(int offset) const     { return *(address*) addr_at(offset); }
 
@@ -218,8 +228,7 @@ class NativeInstruction {
   }
 
   bool is_membar() {
-    unsigned int insn = uint_at(0);
-    return (insn & 0x7f) == 0b1111 && Assembler::extract(insn, 14, 12) == 0;
+    return (uint_at(0) & 0x7f) == 0b1111 && extract_funct3(addr_at(0)) == 0;
   }
 };
 
@@ -227,7 +236,7 @@ inline NativeInstruction* nativeInstruction_at(address addr) {
   return (NativeInstruction*)addr;
 }
 
-// The natural type of an RISCV64 instruction is uint32_t
+// The natural type of an RISCV instruction is uint32_t
 inline NativeInstruction* nativeInstruction_at(uint32_t *addr) {
   return (NativeInstruction*)addr;
 }
@@ -239,7 +248,7 @@ inline NativeCall* nativeCall_at(address addr);
 
 class NativeCall: public NativeInstruction {
  public:
-  enum RISCV64_specific_constants {
+  enum RISCV_specific_constants {
     instruction_size            =    4,
     instruction_offset          =    0,
     displacement_offset         =    0,
@@ -251,27 +260,24 @@ class NativeCall: public NativeInstruction {
   address return_address() const            { return addr_at(return_address_offset); }
   address destination() const;
 
-  void set_destination(address dest)      {
-    if (is_jal()) {
-      intptr_t offset = (intptr_t)(dest - instruction_address());
-      assert((offset & 0x1) == 0, "should be aligned");
-      assert(is_imm_in_range(offset, 20, 1), "set_destination, offset is too large to be patched in one jal insrusction\n");
-      unsigned int insn = 0b1101111; // jal
-      address pInsn = (address)(&insn);
-      Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1);
-      Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff);
-      Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1);
-      Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff);
-      Assembler::patch(pInsn, 11, 7, lr->encoding()); // Rd must be x1, need lr
-      set_int_at(displacement_offset, insn);
-      return;
-    }
-    ShouldNotReachHere();
+  void set_destination(address dest) {
+    assert(is_jal(), "Should be jal instruction!");
+    intptr_t offset = (intptr_t)(dest - instruction_address());
+    assert((offset & 0x1) == 0, "bad alignment");
+    assert(is_imm_in_range(offset, 20, 1), "encoding constraint");
+    unsigned int insn = 0b1101111; // jal
+    address pInsn = (address)(&insn);
+    Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1);
+    Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff);
+    Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1);
+    Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff);
+    Assembler::patch(pInsn, 11, 7, ra->encoding()); // Rd must be x1, need ra
+    set_int_at(displacement_offset, insn);
   }
 
-  void  verify_alignment()                       { ; }
-  void  verify();
-  void  print();
+  void verify_alignment() {} // do nothing on riscv
+  void verify();
+  void print();
 
   // Creation
   inline friend NativeCall* nativeCall_at(address addr);
@@ -326,8 +332,8 @@ inline NativeCall* nativeCall_before(address return_address) {
 // (used to manipulate inlined 64-bit data calls, etc.)
 class NativeMovConstReg: public NativeInstruction {
  public:
-  enum RISCV64_specific_constants {
-    movptr_instruction_size      =    6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, addi.  See movptr().
+  enum RISCV_specific_constants {
+    movptr_instruction_size             =    6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, addi.  See movptr().
     movptr_with_offset_instruction_size =    5 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli. See movptr_with_offset().
     load_pc_relative_instruction_size   =    2 * NativeInstruction::instruction_size, // auipc, ld
     instruction_offset                  =    0,
@@ -392,7 +398,7 @@ inline NativeMovConstReg* nativeMovConstReg_before(address addr) {
   return test;
 }
 
-// RISCV64 should not use C1 runtime patching, so just leave NativeMovRegMem Unimplemented.
+// RISCV should not use C1 runtime patching, so just leave NativeMovRegMem Unimplemented.
 class NativeMovRegMem: public NativeInstruction {
  public:
   int instruction_start() const {
@@ -430,7 +436,7 @@ inline NativeMovRegMem* nativeMovRegMem_at (address addr) {
 
 class NativeJump: public NativeInstruction {
  public:
-  enum RISCV64_specific_constants {
+  enum RISCV_specific_constants {
     instruction_size            =    NativeInstruction::instruction_size,
     instruction_offset          =    0,
     data_offset                 =    0,
@@ -447,9 +453,6 @@ class NativeJump: public NativeInstruction {
 
   void verify();
 
-  // Unit testing stuff
-  static void test() {}
-
   // Insertion of native jump instruction
   static void insert(address code_pos, address entry);
   // MT-safe insertion of native jump at verified method entry
@@ -467,11 +470,11 @@ inline NativeJump* nativeJump_at(address addr) {
 
 class NativeGeneralJump: public NativeJump {
 public:
-  enum RISCV64_specific_constants {
-    instruction_size     =    6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, jalr
+  enum RISCV_specific_constants {
+    instruction_size            =    6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, jalr
     instruction_offset          =    0,
     data_offset                 =    0,
-    normal_next_instruction_offset     =    6 * NativeInstruction::instruction_size,  // lui, addi, slli, addi, slli, jalr
+    next_instruction_offset     =    6 * NativeInstruction::instruction_size  // lui, addi, slli, addi, slli, jalr
   };
 
   address jump_destination() const;
@@ -493,9 +496,8 @@ class NativeIllegalInstruction: public NativeInstruction {
   static void insert(address code_pos);
 };
 
-inline bool NativeInstruction::is_nop() {
-  address instr_addr = addr_at(0);
-  uint32_t insn = *(uint32_t*)instr_addr;
+inline bool NativeInstruction::is_nop()         {
+  uint32_t insn = *(uint32_t*)addr_at(0);
   return insn == 0x13;
 }
 
@@ -507,7 +509,7 @@ inline bool NativeInstruction::is_jump_or_nop() {
 class NativeCallTrampolineStub : public NativeInstruction {
  public:
 
-  enum RISCV64_specific_constants {
+  enum RISCV_specific_constants {
     // Refer to function emit_trampoline_stub.
     instruction_size = 3 * NativeInstruction::instruction_size + wordSize, // auipc + ld + jr + target address
     data_offset      = 3 * NativeInstruction::instruction_size,            // auipc + ld + jr
@@ -529,11 +531,14 @@ inline bool is_NativeCallTrampolineStub_at(address addr) {
   // 2). check if auipc[11:7] == t0 and ld[11:7] == t0 and ld[19:15] == t0 && jr[19:15] == t0
   // 3). check if the offset in ld[31:20] equals the data_offset
   assert_cond(addr != NULL);
-  if (NativeInstruction::is_auipc_at(addr) && NativeInstruction::is_ld_at(addr + 4) && NativeInstruction::is_jalr_at(addr + 8) &&
-      ((Register)(intptr_t)Assembler::extract(((unsigned*)addr)[0], 11, 7)     == x5) &&
-      ((Register)(intptr_t)Assembler::extract(((unsigned*)addr)[1], 11, 7)     == x5) &&
-      ((Register)(intptr_t)Assembler::extract(((unsigned*)addr)[1], 19, 15)    == x5) &&
-      ((Register)(intptr_t)Assembler::extract(((unsigned*)addr)[2], 19, 15)    == x5) &&
+  const int instr_size = NativeInstruction::instruction_size;
+  if (NativeInstruction::is_auipc_at(addr) &&
+      NativeInstruction::is_ld_at(addr + instr_size) &&
+      NativeInstruction::is_jalr_at(addr + 2 * instr_size) &&
+      (NativeInstruction::extract_rd(addr)                    == x5) &&
+      (NativeInstruction::extract_rd(addr + instr_size)       == x5) &&
+      (NativeInstruction::extract_rs1(addr + instr_size)      == x5) &&
+      (NativeInstruction::extract_rs1(addr + 2 * instr_size)  == x5) &&
       (Assembler::extract(((unsigned*)addr)[1], 31, 20) == NativeCallTrampolineStub::data_offset)) {
     return true;
   }
diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.hpp b/src/hotspot/cpu/riscv/registerMap_riscv.hpp
index f2b87c01fa3..fef8ca9b64e 100644
--- a/src/hotspot/cpu/riscv/registerMap_riscv.hpp
+++ b/src/hotspot/cpu/riscv/registerMap_riscv.hpp
@@ -1,6 +1,5 @@
 /*
  * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
  * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -34,7 +33,6 @@
   // This is the hook for finding a register in an "well-known" location,
   // such as a register block of a predetermined format.
   address pd_location(VMReg reg) const { return NULL; }
-  address pd_location(VMReg base_reg, int slot_idx) const;
 
   // no PD state to clear or copy:
   void pd_clear() {}
diff --git a/src/hotspot/cpu/riscv/register_definitions_riscv.cpp b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp
index 61f7f77e793..583f67573ca 100644
--- a/src/hotspot/cpu/riscv/register_definitions_riscv.cpp
+++ b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp
@@ -1,7 +1,7 @@
 /*
- * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -26,7 +26,6 @@
 
 #include "precompiled.hpp"
 #include "asm/assembler.hpp"
-#include "asm/macroAssembler.inline.hpp"
 #include "asm/register.hpp"
 #include "interp_masm_riscv.hpp"
 #include "register_riscv.hpp"
@@ -177,7 +176,6 @@ REGISTER_DEFINITION(Register, gp);
 REGISTER_DEFINITION(Register, tp);
 REGISTER_DEFINITION(Register, xmethod);
 REGISTER_DEFINITION(Register, ra);
-REGISTER_DEFINITION(Register, lr);
 REGISTER_DEFINITION(Register, sp);
 REGISTER_DEFINITION(Register, fp);
 REGISTER_DEFINITION(Register, xheapbase);
diff --git a/src/hotspot/cpu/riscv/register_riscv.cpp b/src/hotspot/cpu/riscv/register_riscv.cpp
index 2e9c88c223a..ef60cb3bb05 100644
--- a/src/hotspot/cpu/riscv/register_riscv.cpp
+++ b/src/hotspot/cpu/riscv/register_riscv.cpp
@@ -1,7 +1,6 @@
 /*
  * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -29,18 +28,14 @@
 
 const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers *
                                           RegisterImpl::max_slots_per_register;
+
 const int ConcreteRegisterImpl::max_fpr =
     ConcreteRegisterImpl::max_gpr +
     FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register;
 
-const int ConcreteRegisterImpl::max_vpr =
-    ConcreteRegisterImpl::max_fpr +
-    VectorRegisterImpl::number_of_registers * VectorRegisterImpl::max_slots_per_register;
-
-
 const char* RegisterImpl::name() const {
-  const char* names[number_of_registers] = {
-    "zr", "ra", "sp", "gp", "tp", "x5", "x6", "x7", "fp", "x9",
+  static const char *const names[number_of_registers] = {
+    "zr", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "fp", "x9",
     "c_rarg0", "c_rarg1", "c_rarg2", "c_rarg3", "c_rarg4", "c_rarg5", "c_rarg6", "c_rarg7",
     "x18", "x19", "esp", "xdispatch", "xbcp", "xthread", "xlocals",
     "xmonitors", "xcpool", "xheapbase", "x28", "x29", "x30", "xmethod"
@@ -49,7 +44,7 @@ const char* RegisterImpl::name() const {
 }
 
 const char* FloatRegisterImpl::name() const {
-  const char* names[number_of_registers] = {
+  static const char *const names[number_of_registers] = {
     "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7",
     "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15",
     "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
@@ -59,7 +54,7 @@ const char* FloatRegisterImpl::name() const {
 }
 
 const char* VectorRegisterImpl::name() const {
-  const char* names[number_of_registers] = {
+  static const char *const names[number_of_registers] = {
     "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
     "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
     "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
diff --git a/src/hotspot/cpu/riscv/register_riscv.hpp b/src/hotspot/cpu/riscv/register_riscv.hpp
index fa8b80ef3b9..ca859569bd2 100644
--- a/src/hotspot/cpu/riscv/register_riscv.hpp
+++ b/src/hotspot/cpu/riscv/register_riscv.hpp
@@ -1,7 +1,6 @@
 /*
  * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -58,31 +57,49 @@ class RegisterImpl: public AbstractRegisterImpl {
  public:
   enum {
     number_of_registers      = 32,
-    number_of_byte_registers = 32,
     max_slots_per_register   = 2,
+
+    // integer registers x8 - x15 and floating-point registers f8 - f15 are allocatable
+    // for compressed instructions. See Table 17.2 in spec.
+    compressed_register_base = 8,
+    compressed_register_top  = 15,
   };
 
   // derived registers, offsets, and addresses
-  Register successor() const                          { return as_Register(encoding() + 1); }
+  const Register successor() const { return as_Register(encoding() + 1); }
 
   // construction
   inline friend Register as_Register(int encoding);
 
-  VMReg as_VMReg();
+  VMReg as_VMReg() const;
 
   // accessors
-  int   encoding() const                         { assert(is_valid(), "invalid register"); return (intptr_t)this; }
-  bool  is_valid() const                         { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
-  bool  has_byte_register() const                { return 0 <= (intptr_t)this && (intptr_t)this < number_of_byte_registers; }
+  int encoding() const            { assert(is_valid(), "invalid register"); return encoding_nocheck(); }
+  int encoding_nocheck() const    { return (intptr_t)this; }
+  bool is_valid() const           { return (unsigned)encoding_nocheck() < number_of_registers; }
   const char* name() const;
-  int   encoding_nocheck() const                 { return (intptr_t)this; }
+
+  // for rvc
+  int compressed_encoding() const {
+    assert(is_compressed_valid(), "invalid compressed register");
+    return encoding() - compressed_register_base;
+  }
+
+  int compressed_encoding_nocheck() const {
+    return encoding_nocheck() - compressed_register_base;
+  }
+
+  bool is_compressed_valid() const {
+    return encoding_nocheck() >= compressed_register_base &&
+           encoding_nocheck() <= compressed_register_top;
+  }
 
   // Return the bit which represents this register.  This is intended
   // to be ORed into a bitmask: for usage see class RegSet below.
-  unsigned long bit(bool should_set = true) const { return should_set ? 1 << encoding() : 0; }
+  uint64_t bit(bool should_set = true) const { return should_set ? 1 << encoding() : 0; }
 };
 
-// The integer registers of the riscv64 architecture
+// The integer registers of the RISCV architecture
 
 CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1));
 
@@ -133,25 +150,43 @@ class FloatRegisterImpl: public AbstractRegisterImpl {
   enum {
     number_of_registers     = 32,
     max_slots_per_register  = 2,
+
+    // float registers in the range of [f8~f15] correspond to RVC. Please see Table 16.2 in spec.
+    compressed_register_base = 8,
+    compressed_register_top  = 15,
   };
 
   // construction
   inline friend FloatRegister as_FloatRegister(int encoding);
 
-  VMReg as_VMReg();
+  VMReg as_VMReg() const;
 
   // derived registers, offsets, and addresses
-  FloatRegister successor() const                          { return as_FloatRegister(encoding() + 1); }
+  FloatRegister successor() const { return as_FloatRegister(encoding() + 1); }
 
   // accessors
-  int   encoding() const                          { assert(is_valid(), "invalid register"); return (intptr_t)this; }
-  int   encoding_nocheck() const                         { return (intptr_t)this; }
-  bool  is_valid() const                          { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
+  int encoding() const            { assert(is_valid(), "invalid register"); return encoding_nocheck(); }
+  int encoding_nocheck() const    { return (intptr_t)this; }
+  int is_valid() const            { return (unsigned)encoding_nocheck() < number_of_registers; }
   const char* name() const;
 
+  // for rvc
+  int compressed_encoding() const {
+    assert(is_compressed_valid(), "invalid compressed register");
+    return encoding() - compressed_register_base;
+  }
+
+  int compressed_encoding_nocheck() const {
+    return encoding_nocheck() - compressed_register_base;
+  }
+
+  bool is_compressed_valid() const {
+    return encoding_nocheck() >= compressed_register_base &&
+           encoding_nocheck() <= compressed_register_top;
+  }
 };
 
-// The float registers of the RISCV64 architecture
+// The float registers of the RISCV architecture
 
 CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1));
 
@@ -196,7 +231,7 @@ inline VectorRegister as_VectorRegister(int encoding) {
   return (VectorRegister)(intptr_t) encoding;
 }
 
-// The implementation of vector registers for riscv-v
+// The implementation of vector registers for RVV
 class VectorRegisterImpl: public AbstractRegisterImpl {
  public:
   enum {
@@ -207,15 +242,15 @@ class VectorRegisterImpl: public AbstractRegisterImpl {
   // construction
   inline friend VectorRegister as_VectorRegister(int encoding);
 
-  VMReg as_VMReg();
+  VMReg as_VMReg() const;
 
   // derived registers, offsets, and addresses
   VectorRegister successor() const { return as_VectorRegister(encoding() + 1); }
 
   // accessors
-  int encoding() const             { assert(is_valid(), "invalid register"); return (intptr_t)this; }
-  int encoding_nocheck() const     { return (intptr_t)this; }
-  bool is_valid() const            { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
+  int encoding() const            { assert(is_valid(), "invalid register"); return encoding_nocheck(); }
+  int encoding_nocheck() const    { return (intptr_t)this; }
+  bool is_valid() const           { return (unsigned)encoding_nocheck() < number_of_registers; }
   const char* name() const;
 
 };
@@ -275,22 +310,20 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl {
   // added to make it compile
   static const int max_gpr;
   static const int max_fpr;
-  static const int max_vpr;
 };
 
 // A set of registers
 class RegSet {
   uint32_t _bitset;
 
-public:
   RegSet(uint32_t bitset) : _bitset(bitset) { }
 
+public:
+
   RegSet() : _bitset(0) { }
 
   RegSet(Register r1) : _bitset(r1->bit()) { }
 
-  ~RegSet() {}
-
   RegSet operator+(const RegSet aSet) const {
     RegSet result(_bitset | aSet._bitset);
     return result;
@@ -330,13 +363,20 @@ class RegSet {
   static RegSet range(Register start, Register end) {
     uint32_t bits = ~0;
     bits <<= start->encoding();
-    bits <<= (31 - end->encoding());
-    bits >>= (31 - end->encoding());
+    bits <<= 31 - end->encoding();
+    bits >>= 31 - end->encoding();
 
     return RegSet(bits);
   }
 
   uint32_t bits() const { return _bitset; }
+
+private:
+
+  Register first() {
+    uint32_t first = _bitset & -_bitset;
+    return first ? as_Register(exact_log2(first)) : noreg;
+  }
 };
 
 #endif // CPU_RISCV_REGISTER_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
index 6b90922f8dc..047ea2276ca 100644
--- a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
+++ b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -38,10 +38,9 @@ void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
 
   int bytes;
 
-  switch(type()) {
+  switch (type()) {
     case relocInfo::oop_type: {
       oop_Relocation *reloc = (oop_Relocation *)this;
-      // in movoop when BarrierSet::barrier_set()->barrier_set_nmethod() != NULL || !immediate
       if (NativeInstruction::is_load_pc_relative_at(addr())) {
         address constptr = (address)code()->oop_addr_at(reloc->oop_index());
         bytes = MacroAssembler::pd_patch_instruction_size(addr(), constptr);
diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.hpp b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp
index c5dd75bf682..840ed935d88 100644
--- a/src/hotspot/cpu/riscv/relocInfo_riscv.hpp
+++ b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp
@@ -1,6 +1,5 @@
 /*
  * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
  * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
index 52d4c23cf33..0a358b7223d 100644
--- a/src/hotspot/cpu/riscv/riscv.ad
+++ b/src/hotspot/cpu/riscv/riscv.ad
@@ -1,7 +1,7 @@
 //
 // Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
 // Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
-// Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
 // This code is free software; you can redistribute it and/or modify it
@@ -24,7 +24,7 @@
 //
 //
 
-// RISCV64 Architecture Description File
+// RISCV Architecture Description File
 
 //----------REGISTER DEFINITION BLOCK------------------------------------------
 // This information is used by the matcher and the register allocator to
@@ -68,8 +68,8 @@ register %{
 //
 // follow the C1 compiler in making registers
 //
-//   x7, x9-x17, x28-x31 volatile (caller save)
-//   x0-x4, x8, x27 system (no save, no allocate)
+//   x7, x9-x17, x27-x31 volatile (caller save)
+//   x0-x4, x8, x23 system (no save, no allocate)
 //   x5-x6 non-allocatable (so we can use them as temporary regs)
 
 //
@@ -82,8 +82,8 @@ register %{
 
 reg_def R0      ( NS,  NS,  Op_RegI, 0,  x0->as_VMReg()         ); // zr
 reg_def R0_H    ( NS,  NS,  Op_RegI, 0,  x0->as_VMReg()->next() );
-reg_def R1      ( SOC, SOC, Op_RegI, 1,  x1->as_VMReg()         ); // lr
-reg_def R1_H    ( SOC, SOC, Op_RegI, 1,  x1->as_VMReg()->next() );
+reg_def R1      ( NS,  SOC, Op_RegI, 1,  x1->as_VMReg()         ); // ra
+reg_def R1_H    ( NS,  SOC, Op_RegI, 1,  x1->as_VMReg()->next() );
 reg_def R2      ( NS,  SOE, Op_RegI, 2,  x2->as_VMReg()         ); // sp
 reg_def R2_H    ( NS,  SOE, Op_RegI, 2,  x2->as_VMReg()->next() );
 reg_def R3      ( NS,  NS,  Op_RegI, 3,  x3->as_VMReg()         ); // gp
@@ -154,7 +154,7 @@ reg_def R31_H   ( SOC, SOC, Op_RegI, 31, x31->as_VMReg()->next());
 // CPU stores such a register pair to memory, the word associated with
 // the lower ADLC-assigned number must be stored to the lower address.
 
-// RISCV64 has 32 floating-point registers. Each can store a single
+// RISCV has 32 floating-point registers. Each can store a single
 // or double precision floating-point value.
 
 // for Java use float registers f0-f31 are always save on call whereas
@@ -397,24 +397,6 @@ reg_def V31_H ( SOC, SOC, Op_VecX, 31, v31->as_VMReg()->next()  );
 reg_def V31_J ( SOC, SOC, Op_VecX, 31, v31->as_VMReg()->next(2) );
 reg_def V31_K ( SOC, SOC, Op_VecX, 31, v31->as_VMReg()->next(3) );
 
-// Double Registers
-
-// The rules of ADL require that double registers be defined in pairs.
-// Each pair must be two 32-bit values, but not necessarily a pair of
-// single float registers. In each pair, ADLC-assigned register numbers
-// must be adjacent, with the lower number even. Finally, when the
-// CPU stores such a register pair to memory, the word associated with
-// the lower ADLC-assigned number must be stored to the lower address.
-
-// RISCV64 has 32 floating-point registers. Each can store a vector of
-// single or double precision floating-point values up to 8 * 32
-// floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
-// use the first float or double element of the vector.
-
-// for Java use float registers v0-v15 are always save on call whereas
-// the platform ABI treats v8-v15 as callee save). float registers
-// v16-v31 are SOC as per the platform spec
-
 // ----------------------------
 // Special Registers
 // ----------------------------
@@ -467,7 +449,7 @@ alloc_class chunk0(
     R4,  R4_H,  // thread
     R8,  R8_H,  // fp
     R0,  R0_H,  // zero
-    R1,  R1_H,  // lr
+    R1,  R1_H,  // ra
     R2,  R2_H,  // sp
     R3,  R3_H,  // gp
 );
@@ -554,8 +536,8 @@ alloc_class chunk3(RFLAGS);
 // Several register classes are automatically defined based upon information in
 // this architecture description.
 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
-// 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
-// 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
+// 2) reg_class compiler_method_reg        ( /* as def'd in frame section */ )
+// 2) reg_class interpreter_method_reg     ( /* as def'd in frame section */ )
 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 //
 
@@ -652,7 +634,7 @@ reg_class any_reg %{
 // Class for non-allocatable 32 bit registers
 reg_class non_allocatable_reg32(
     R0,                       // zr
-    R1,                       // lr
+    R1,                       // ra
     R2,                       // sp
     R3,                       // gp
     R4,                       // tp
@@ -662,7 +644,7 @@ reg_class non_allocatable_reg32(
 // Class for non-allocatable 64 bit registers
 reg_class non_allocatable_reg(
     R0,  R0_H,                // zr
-    R1,  R1_H,                // lr
+    R1,  R1_H,                // ra
     R2,  R2_H,                // sp
     R3,  R3_H,                // gp
     R4,  R4_H,                // tp
@@ -763,7 +745,7 @@ reg_class fp_reg(
 );
 
 // Class for link register
-reg_class lr_reg(
+reg_class ra_reg(
     R1, R1_H
 );
 
@@ -846,10 +828,6 @@ reg_class double_reg(
     F31, F31_H
 );
 
-// Class for all 64bit vector registers
-reg_class vectord_reg(
-);
-
 // Class for all 128bit vector registers
 reg_class vectorx_reg(
     V1, V1_H, V1_J, V1_K,
@@ -1028,19 +1006,16 @@ class HandlerImpl {
   }
 };
 
-// predicate controlling translation of StoreCM
-bool unnecessary_storestore(const Node *storecm);
-
 bool is_CAS(int opcode, bool maybe_volatile);
 
 // predicate controlling translation of CompareAndSwapX
-bool needs_acquiring_load_exclusive(const Node *load);
+bool needs_acquiring_load_reserved(const Node *load);
+
+// predicate controlling translation of StoreCM
+bool unnecessary_storestore(const Node *storecm);
 
 // predicate controlling addressing modes
 bool size_fits_all_mem_uses(AddPNode* addp, int shift);
-
-// predicate using the temp register for decoding klass
-bool maybe_use_tmp_register_decoding_klass();
 %}
 
 source %{
@@ -1089,34 +1064,13 @@ void reg_mask_init() {
   }
 }
 
-// predicate controlling translation of StoreCM
-//
-// returns true if a StoreStore must precede the card write otherwise
-// false
-bool unnecessary_storestore(const Node *storecm)
-{
-  assert(storecm != NULL && storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
-
-  // we need to generate a membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore)
-  // between an object put and the associated card mark when we are using
-  // CMS without conditional card marking
-
-  if (UseConcMarkSweepGC && !UseCondCardMark) {
-    return false;
-  }
-
-  // a storestore is unnecesary in all other cases
-
-  return true;
-}
-
 // is_CAS(int opcode, bool maybe_volatile)
 //
 // return true if opcode is one of the possible CompareAndSwapX
 // values otherwise false.
 bool is_CAS(int opcode, bool maybe_volatile)
 {
-  switch(opcode) {
+  switch (opcode) {
     // We handle these
     case Op_CompareAndSwapI:
     case Op_CompareAndSwapL:
@@ -1147,12 +1101,6 @@ bool is_CAS(int opcode, bool maybe_volatile)
     case Op_WeakCompareAndSwapL:
     case Op_WeakCompareAndSwapP:
     case Op_WeakCompareAndSwapN:
-#if INCLUDE_SHENANDOAHGC
-    case Op_ShenandoahWeakCompareAndSwapP:
-    case Op_ShenandoahWeakCompareAndSwapN:
-    case Op_ShenandoahCompareAndExchangeP:
-    case Op_ShenandoahCompareAndExchangeN:
-#endif
       return maybe_volatile;
     default:
       return false;
@@ -1162,12 +1110,9 @@ bool is_CAS(int opcode, bool maybe_volatile)
 // predicate controlling translation of CAS
 //
 // returns true if CAS needs to use an acquiring load otherwise false
-bool needs_acquiring_load_exclusive(const Node *n)
+bool needs_acquiring_load_reserved(const Node *n)
 {
   assert(n != NULL && is_CAS(n->Opcode(), true), "expecting a compare and swap");
-  if (UseBarriersForVolatile) {
-    return false;
-  }
 
   LoadStoreNode* ldst = n->as_LoadStore();
   if (n != NULL && is_CAS(n->Opcode(), false)) {
@@ -1179,11 +1124,28 @@ bool needs_acquiring_load_exclusive(const Node *n)
   return true;
 }
 
-bool maybe_use_tmp_register_decoding_klass() {
-  return !UseCompressedOops &&
-         Universe::narrow_klass_base() != NULL &&
-         Universe::narrow_klass_shift() != 0;
+// predicate controlling translation of StoreCM
+//
+// returns true if a StoreStore must precede the card write otherwise
+// false
+
+bool unnecessary_storestore(const Node *storecm)
+{
+  assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
+
+  // we need to generate a dmb ishst between an object put and the
+  // associated card mark when we are using CMS without conditional
+  // card marking
+
+  if (UseConcMarkSweepGC && !UseCondCardMark) {
+    return false;
+  }
+
+  // a storestore is unnecesary in all other cases
+
+  return true;
 }
+
 #define __ _masm.
 
 // advance declarations for helper functions to convert register
@@ -1216,25 +1178,49 @@ int MachCallRuntimeNode::ret_addr_offset() {
   //   jal(addr)
   // or with far branches
   //   jal(trampoline_stub)
-  // for real runtime callouts it will be 12 instructions
-  // see riscv64_enc_java_to_runtime
+  // for real runtime callouts it will be 11 instructions
+  // see riscv_enc_java_to_runtime
   //   la(t1, retaddr)                ->  auipc + addi
   //   la(t0, RuntimeAddress(addr))   ->  lui + addi + slli + addi + slli + addi
   //   addi(sp, sp, -2 * wordSize)    ->  addi
-  //   sd(zr, Address(sp))            ->  sd
-  //   sd(t1, Address(sp, wordSize))  ->  sd   -> sdd in CSky
+  //   sd(t1, Address(sp, wordSize))  ->  sd
   //   jalr(t0)                       ->  jalr
   CodeBlob *cb = CodeCache::find_blob(_entry_point);
   if (cb != NULL) {
     return 1 * NativeInstruction::instruction_size;
   } else {
-    if (UseCSky)
-      return 11 * NativeInstruction::instruction_size;
-    else
-      return 12 * NativeInstruction::instruction_size;
+    return 11 * NativeInstruction::instruction_size;
   }
 }
 
+//
+// Compute padding required for nodes which need alignment
+//
+
+// With RVC a call instruction may get 2-byte aligned.
+// The address of the call instruction needs to be 4-byte aligned to
+// ensure that it does not span a cache line so that it can be patched.
+int CallStaticJavaDirectNode::compute_padding(int current_offset) const
+{
+  // to make sure the address of jal 4-byte aligned.
+  return align_up(current_offset, alignment_required()) - current_offset;
+}
+
+// With RVC a call instruction may get 2-byte aligned.
+// The address of the call instruction needs to be 4-byte aligned to
+// ensure that it does not span a cache line so that it can be patched.
+int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
+{
+  // skip the movptr in MacroAssembler::ic_call():
+  // lui + addi + slli + addi + slli + addi
+  // Though movptr() has already 4-byte aligned with or without RVC,
+  // We need to prevent from further changes by explicitly calculating the size.
+  const int movptr_size = 6 * NativeInstruction::instruction_size;
+  current_offset += movptr_size;
+  // to make sure the address of jal 4-byte aligned.
+  return align_up(current_offset, alignment_required()) - current_offset;
+}
+
 // Indicate if the safepoint node needs the polling page as an input
 
 // the shared code plants the oop data at the start of the generated
@@ -1260,6 +1246,7 @@ void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 
 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
   MacroAssembler _masm(&cbuf);
+  Assembler::CompressibleRegion cr(&_masm);
   __ ebreak();
 }
 
@@ -1277,13 +1264,14 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
 
   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
     MacroAssembler _masm(&cbuf);
+    Assembler::CompressibleRegion cr(&_masm); // nops shall be 2-byte under RVC for alignment purposes.
     for (int i = 0; i < _count; i++) {
       __ nop();
     }
   }
 
   uint MachNopNode::size(PhaseRegAlloc*) const {
-    return _count * NativeInstruction::instruction_size;
+    return _count * (UseRVC ? NativeInstruction::compressed_instruction_size : NativeInstruction::instruction_size);
   }
 
 //=============================================================================
@@ -1324,9 +1312,9 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
     st->print("# stack bang size=%d\n\t", framesize);
   }
 
-  st->print("sd  fp, [sp, #%d]", - 2 * wordSize);
-  st->print("sd  lr, [sp, #%d]", - wordSize);
-  if (PreserveFramePointer) { st->print("\n\tsub  fp, sp, #%d", 2 * wordSize); }
+  st->print("sd  fp, [sp, #%d]\n\t", - 2 * wordSize);
+  st->print("sd  ra, [sp, #%d]\n\t", - wordSize);
+  if (PreserveFramePointer) { st->print("sub  fp, sp, #%d\n\t", 2 * wordSize); }
   st->print("sub sp, sp, #%d\n\t", framesize);
 }
 #endif
@@ -1337,17 +1325,16 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
   MacroAssembler _masm(&cbuf);
 
   // n.b. frame size includes space for return pc and fp
-  const long framesize = C->frame_size_in_bytes();
-  assert(framesize % (2 * wordSize) == 0, "must preserve 2 * wordSize alignment");
+  const int framesize = C->frame_size_in_bytes();
 
   // insert a nop at the start of the prolog so we can patch in a
   // branch if we need to invalidate the method later
-  __ nop();  // 4 bytes
+  __ nop();
 
   assert_cond(C != NULL);
 
   int bangsize = C->bang_size_in_bytes();
-  if (C->need_stack_bang(bangsize) && UseStackBanging) {
+  if (C->need_stack_bang(bangsize)) {
     __ generate_stack_overflow_check(bangsize);
   }
 
@@ -1391,19 +1378,19 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
   st->print("# pop frame %d\n\t", framesize);
 
   if (framesize == 0) {
-    st->print("ld  lr, [sp,#%d]\n\t", (2 * wordSize));
+    st->print("ld  ra, [sp,#%d]\n\t", (2 * wordSize));
     st->print("ld  fp, [sp,#%d]\n\t", (3 * wordSize));
     st->print("add sp, sp, #%d\n\t", (2 * wordSize));
   } else {
     st->print("add  sp, sp, #%d\n\t", framesize);
-    st->print("ld  lr, [sp,#%d]\n\t", - 2 * wordSize);
+    st->print("ld  ra, [sp,#%d]\n\t", - 2 * wordSize);
     st->print("ld  fp, [sp,#%d]\n\t", - wordSize);
   }
 
   if (do_polling() && C->is_method_compilation()) {
     st->print("# touch polling page\n\t");
     st->print("li  t0, #0x%lx\n\t", p2i(os::get_polling_page()));
-    st->print("ld zr, [t0]");
+    st->print("ld  zr, [t0]");
   }
 }
 #endif
@@ -1440,6 +1427,9 @@ const Pipeline * MachEpilogNode::pipeline() const {
   return MachNode::pipeline_class();
 }
 
+// This method seems to be obsolete. It is declared in machnode.hpp
+// and defined in all *.ad files, but it is never called. Should we
+// get rid of it?
 int MachEpilogNode::safepoint_offset() const {
   assert(do_polling(), "no return for this epilog node");
   return 4;
@@ -1534,13 +1524,15 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
     }
   } else if (cbuf != NULL) {
     MacroAssembler _masm(cbuf);
+    Assembler::CompressibleRegion cr(&_masm);
     switch (src_lo_rc) {
       case rc_int:
         if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
-          __ mv(as_Register(Matcher::_regEncode[dst_lo]),
-                as_Register(Matcher::_regEncode[src_lo]));
-          if (!is64 && this->ideal_reg() != Op_RegI) // zero extended for narrow oop or klass
-            __ clear_upper_bits(as_Register(Matcher::_regEncode[dst_lo]), 32);
+          if (!is64 && this->ideal_reg() != Op_RegI) { // zero extended for narrow oop or klass
+            __ zero_extend(as_Register(Matcher::_regEncode[dst_lo]), as_Register(Matcher::_regEncode[src_lo]), 32);
+          } else {
+            __ mv(as_Register(Matcher::_regEncode[dst_lo]), as_Register(Matcher::_regEncode[src_lo]));
+          }
         } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
           if (is64) {
             __ fmv_d_x(as_FloatRegister(Matcher::_regEncode[dst_lo]),
@@ -1614,11 +1606,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
     } else {
       st->print("%s", Matcher::regName[dst_lo]);
     }
-    if (bottom_type() == NULL || bottom_type()->isa_vect() != NULL) {
-      ShouldNotReachHere();
-    } else {
-      st->print("\t# spill size = %d", is64 ? 64 : 32);
-    }
+    st->print("\t# spill size = %d", is64 ? 64 : 32);
   }
 
   return 0;
@@ -1690,14 +1678,16 @@ void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
   assert_cond(st != NULL);
   st->print_cr("# MachUEPNode");
   if (UseCompressedClassPointers) {
-    st->print_cr("\tlw t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass");
+    st->print_cr("\tlwu t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass");
     if (Universe::narrow_klass_shift() != 0) {
       st->print_cr("\tdecode_klass_not_null t0, t0");
     }
   } else {
-   st->print_cr("\tld t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass");
+    st->print_cr("\tld t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass");
   }
-  st->print_cr("\tbne x10, t0, SharedRuntime::_ic_miss_stub\t # Inline cache check");
+  st->print_cr("\tbeq t0, t1, ic_hit");
+  st->print_cr("\tj, SharedRuntime::_ic_miss_stub\t # Inline cache check");
+  st->print_cr("\tic_hit:");
 }
 #endif
 
@@ -1775,16 +1765,22 @@ const bool Matcher::match_rule_supported(int opcode) {
   }
 
   switch (opcode) {
+    case Op_PopCountI:
+    case Op_PopCountL:
+      return UsePopCountInstruction;
+
+    case Op_CountLeadingZerosI:
+    case Op_CountLeadingZerosL:
+    case Op_CountTrailingZerosI:
+    case Op_CountTrailingZerosL:
+      return UseRVB;
+
     case Op_StrCompressedCopy: // fall through
     case Op_StrInflatedCopy:   // fall through
       return UseRVV;
 
     case Op_EncodeISOArray:
       return UseRVV && SpecialEncodeISOArray;
-
-    case Op_PopCountI:
-    case Op_PopCountL:
-      return UsePopCountInstruction;
   }
 
   return true; // Per default match rules are supported.
@@ -1850,12 +1846,7 @@ const bool Matcher::convL2FSupported(void) {
 
 // Vector width in bytes.
 const int Matcher::vector_width_in_bytes(BasicType bt) {
-  int size = MIN2(16, (int)MaxVectorSize);
-  // Minimum 2 values in vector
-  if (size < 2 * type2aelembytes(bt)) { size = 0; }
-  // But never < 4
-  if (size < 4) { size = 0; }
-  return size;
+  return 0;
 }
 
 // Limits on vector size (number of elements) loaded into vector.
@@ -1863,10 +1854,7 @@ const int Matcher::max_vector_size(const BasicType bt) {
   return vector_width_in_bytes(bt) / type2aelembytes(bt);
 }
 const int Matcher::min_vector_size(const BasicType bt) {
-  //  For the moment limit the vector size to 8 bytes
-  int size = 8 / type2aelembytes(bt);
-  if (size < 2) { size = 2; }
-  return size;
+  return max_vector_size(bt);
 }
 
 // Vector ideal reg.
@@ -1885,8 +1873,8 @@ const bool Matcher::pass_original_key_for_aes() {
   return false;
 }
 
+// RISC-V supports misaligned vectors store/load.
 const bool Matcher::misaligned_vectors_ok() {
-  ShouldNotReachHere();
   return true;
 }
 
@@ -1940,7 +1928,7 @@ bool Matcher::is_generic_vector(MachOper* opnd)  {
 // Implicit_null_check optimization moves the Decode along with the
 // memory operation back up before the NullCheck.
 bool Matcher::narrow_oop_use_complex_address() {
-  return false;
+  return Universe::narrow_oop_shift() == 0;
 }
 
 bool Matcher::narrow_klass_use_complex_address() {
@@ -2113,14 +2101,15 @@ void Compile::reshape_address(AddPNode* addp) {
 encode %{
   // BEGIN Non-volatile memory access
 
-  enc_class riscv64_enc_li_imm(iRegIorL dst, immIorL src) %{
+  enc_class riscv_enc_li_imm(iRegIorL dst, immIorL src) %{
     MacroAssembler _masm(&cbuf);
+    Assembler::CompressibleRegion cr(&_masm);
     int64_t con = (int64_t)$src$$constant;
     Register dst_reg = as_Register($dst$$reg);
     __ li(dst_reg, con);
   %}
 
-  enc_class riscv64_enc_mov_p(iRegP dst, immP src) %{
+  enc_class riscv_enc_mov_p(iRegP dst, immP src) %{
     MacroAssembler _masm(&cbuf);
     Register dst_reg = as_Register($dst$$reg);
     address con = (address)$src$$constant;
@@ -2139,13 +2128,14 @@ encode %{
     }
   %}
 
-  enc_class riscv64_enc_mov_p1(iRegP dst) %{
+  enc_class riscv_enc_mov_p1(iRegP dst) %{
     MacroAssembler _masm(&cbuf);
+    Assembler::CompressibleRegion cr(&_masm);
     Register dst_reg = as_Register($dst$$reg);
     __ li(dst_reg, 1);
   %}
 
-  enc_class riscv64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
+  enc_class riscv_enc_mov_poll_page(iRegP dst, immPollPage src) %{
     MacroAssembler _masm(&cbuf);
     int32_t offset = 0;
     address page = (address)$src$$constant;
@@ -2156,12 +2146,12 @@ encode %{
     __ addi(dst_reg, dst_reg, offset);
   %}
 
-  enc_class riscv64_enc_mov_byte_map_base(iRegP dst) %{
+  enc_class riscv_enc_mov_byte_map_base(iRegP dst) %{
     MacroAssembler _masm(&cbuf);
     __ load_byte_map_base($dst$$Register);
   %}
 
-  enc_class riscv64_enc_mov_n(iRegN dst, immN src) %{
+  enc_class riscv_enc_mov_n(iRegN dst, immN src) %{
     MacroAssembler _masm(&cbuf);
     Register dst_reg = as_Register($dst$$reg);
     address con = (address)$src$$constant;
@@ -2174,13 +2164,13 @@ encode %{
     }
   %}
 
-  enc_class riscv64_enc_mov_zero(iRegNorP dst) %{
+  enc_class riscv_enc_mov_zero(iRegNorP dst) %{
     MacroAssembler _masm(&cbuf);
     Register dst_reg = as_Register($dst$$reg);
     __ mv(dst_reg, zr);
   %}
 
-  enc_class riscv64_enc_mov_nk(iRegN dst, immNKlass src) %{
+  enc_class riscv_enc_mov_nk(iRegN dst, immNKlass src) %{
     MacroAssembler _masm(&cbuf);
     Register dst_reg = as_Register($dst$$reg);
     address con = (address)$src$$constant;
@@ -2193,42 +2183,42 @@ encode %{
     }
   %}
 
-  enc_class riscv64_enc_cmpxchgw(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
+  enc_class riscv_enc_cmpxchgw(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
     MacroAssembler _masm(&cbuf);
     __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
                /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
                /*result as bool*/ true);
   %}
 
-  enc_class riscv64_enc_cmpxchgn(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
+  enc_class riscv_enc_cmpxchgn(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
     MacroAssembler _masm(&cbuf);
     __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
                /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
                /*result as bool*/ true);
   %}
 
-  enc_class riscv64_enc_cmpxchg(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
+  enc_class riscv_enc_cmpxchg(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
     MacroAssembler _masm(&cbuf);
     __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
                /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
                /*result as bool*/ true);
   %}
 
-  enc_class riscv64_enc_cmpxchgw_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
+  enc_class riscv_enc_cmpxchgw_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
     MacroAssembler _masm(&cbuf);
     __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
                /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
                /*result as bool*/ true);
   %}
 
-  enc_class riscv64_enc_cmpxchgn_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
+  enc_class riscv_enc_cmpxchgn_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
     MacroAssembler _masm(&cbuf);
     __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
                /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
                /*result as bool*/ true);
   %}
 
-  enc_class riscv64_enc_cmpxchg_acq(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
+  enc_class riscv_enc_cmpxchg_acq(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
     MacroAssembler _masm(&cbuf);
     __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
                /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
@@ -2237,16 +2227,16 @@ encode %{
 
   // compare and branch instruction encodings
 
-  enc_class riscv64_enc_j(label lbl) %{
+  enc_class riscv_enc_j(label lbl) %{
     MacroAssembler _masm(&cbuf);
     Label* L = $lbl$$label;
     __ j(*L);
   %}
 
-  enc_class riscv64_enc_far_cmpULtGe_imm0_branch(cmpOpULtGe cmp, iRegIorL op1, label lbl) %{
+  enc_class riscv_enc_far_cmpULtGe_imm0_branch(cmpOpULtGe cmp, iRegIorL op1, label lbl) %{
     MacroAssembler _masm(&cbuf);
     Label* L = $lbl$$label;
-    switch($cmp$$cmpcode) {
+    switch ($cmp$$cmpcode) {
       case(BoolTest::ge):
         __ j(*L);
         break;
@@ -2259,7 +2249,7 @@ encode %{
 
   // call instruction encodings
 
-  enc_class riscv64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result) %{
+  enc_class riscv_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result) %{
     Register sub_reg = as_Register($sub$$reg);
     Register super_reg = as_Register($super$$reg);
     Register temp_reg = as_Register($temp$$reg);
@@ -2286,7 +2276,7 @@ encode %{
     __ bind(done);
   %}
 
-  enc_class riscv64_enc_java_static_call(method meth) %{
+  enc_class riscv_enc_java_static_call(method meth) %{
     MacroAssembler _masm(&cbuf);
 
     address addr = (address)$meth$$method;
@@ -2318,7 +2308,7 @@ encode %{
     }
   %}
 
-  enc_class riscv64_enc_java_dynamic_call(method meth) %{
+  enc_class riscv_enc_java_dynamic_call(method meth) %{
     MacroAssembler _masm(&cbuf);
     int method_index = resolved_method_index(cbuf);
     address call = __ ic_call((address)$meth$$method, method_index);
@@ -2328,7 +2318,7 @@ encode %{
     }
   %}
 
-  enc_class riscv64_enc_call_epilog() %{
+  enc_class riscv_enc_call_epilog() %{
     MacroAssembler _masm(&cbuf);
     if (VerifyStackAtCalls) {
       // Check that stack depth is unchanged: find majik cookie on stack
@@ -2336,7 +2326,7 @@ encode %{
     }
   %}
 
-  enc_class riscv64_enc_java_to_runtime(method meth) %{
+  enc_class riscv_enc_java_to_runtime(method meth) %{
     MacroAssembler _masm(&cbuf);
 
     // some calls to generated routines (arraycopy code) are scheduled
@@ -2357,7 +2347,6 @@ encode %{
       __ la(t0, RuntimeAddress(entry));
       // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
       __ addi(sp, sp, -2 * wordSize);
-      __ sd(zr, Address(sp));
       __ sd(t1, Address(sp, wordSize));
       __ jalr(t0);
       __ bind(retaddr);
@@ -2366,29 +2355,28 @@ encode %{
   %}
 
   // using the cr register as the bool result: 0 for success; others failed.
-  enc_class riscv64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
+  enc_class riscv_enc_fast_lock(iRegP object, iRegP box, iRegP tmp1, iRegP tmp2) %{
     MacroAssembler _masm(&cbuf);
     Register flag = t1;
     Register oop = as_Register($object$$reg);
     Register box = as_Register($box$$reg);
-    Register disp_hdr = as_Register($tmp$$reg);
+    Register disp_hdr = as_Register($tmp1$$reg);
     Register tmp = as_Register($tmp2$$reg);
     Label cont;
     Label object_has_monitor;
 
     assert_different_registers(oop, box, tmp, disp_hdr, t0);
 
-    // Load markOop from object into displaced_header.
+    // Load markWord from object into displaced_header.
     __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
 
     // Always do locking in runtime.
     if (EmitSync & 0x01) {
-      __ li(flag, 1);
+      __ mv(flag, 1);
       return;
     }
 
     if (UseBiasedLocking && !UseOptoBiasInlining) {
-      // ignore slow case here
       __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont, /*slow_case*/NULL, NULL, flag);
     }
 
@@ -2398,15 +2386,15 @@ encode %{
       __ bnez(t0, object_has_monitor);
     }
 
-    // Set tmp to be (markOop of object | UNLOCK_VALUE).
+    // Set tmp to be (markWord of object | UNLOCK_VALUE).
     __ ori(tmp, disp_hdr, markOopDesc::unlocked_value);
 
     // Initialize the box. (Must happen before we update the object mark!)
     __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
 
-    // Compare object markOop with an unlocked value (tmp) and if
-    // equal exchange the stack address of our box with object markOop.
-    // On failure disp_hdr contains the possibly locked markOop.
+    // Compare object markWord with an unlocked value (tmp) and if
+    // equal exchange the stack address of our box with object markWord.
+    // On failure disp_hdr contains the possibly locked markWord.
     __ cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, Assembler::aq,
                Assembler::rl, /*result*/disp_hdr);
     __ mv(flag, zr);
@@ -2419,7 +2407,7 @@ encode %{
     // We did not see an unlocked object so try the fast recursive case.
 
     // Check if the owner is self by comparing the value in the
-    // markOop of object (disp_hdr) with the stack pointer.
+    // markWord of object (disp_hdr) with the stack pointer.
     __ sub(disp_hdr, disp_hdr, sp);
     __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place));
     // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont,
@@ -2440,7 +2428,7 @@ encode %{
       // Try to CAS m->owner from NULL to current thread.
       __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value));
       __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq,
-               Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected)
+                 Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected)
 
       // Store a non-null value into the box to avoid looking like a re-entrant
       // lock. The fast-path monitor unlock code checks for
@@ -2454,12 +2442,12 @@ encode %{
   %}
 
   // using cr flag to indicate the fast_unlock result: 0 for success; others failed.
-  enc_class riscv64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
+  enc_class riscv_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp1, iRegP tmp2) %{
     MacroAssembler _masm(&cbuf);
     Register flag = t1;
     Register oop = as_Register($object$$reg);
     Register box = as_Register($box$$reg);
-    Register disp_hdr = as_Register($tmp$$reg);
+    Register disp_hdr = as_Register($tmp1$$reg);
     Register tmp = as_Register($tmp2$$reg);
     Label cont;
     Label object_has_monitor;
@@ -2468,7 +2456,7 @@ encode %{
 
     // Always do locking in runtime.
     if (EmitSync & 0x01) {
-      __ li(flag, 1);
+      __ mv(flag, 1);
       return;
     }
 
@@ -2491,7 +2479,7 @@ encode %{
     }
 
     // Check if it is still a light weight lock, this is true if we
-    // see the stack address of the basicLock in the markOop of the
+    // see the stack address of the basicLock in the markWord of the
     // object.
 
     __ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed,
@@ -2504,7 +2492,8 @@ encode %{
     // Handle existing monitor.
     if ((EmitSync & 0x02) == 0) {
       __ bind(object_has_monitor);
-      __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
+      STATIC_ASSERT(markOopDesc::monitor_value <= INT_MAX);
+      __ add(tmp, tmp, -(int)markOopDesc::monitor_value); // monitor
       __ ld(flag, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
       __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
       __ xorr(flag, flag, xthread); // Will be 0 if we are the owner.
@@ -2526,7 +2515,7 @@ encode %{
 
   // arithmetic encodings
 
-  enc_class riscv64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
+  enc_class riscv_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
     MacroAssembler _masm(&cbuf);
     Register dst_reg = as_Register($dst$$reg);
     Register src1_reg = as_Register($src1$$reg);
@@ -2534,7 +2523,7 @@ encode %{
     __ corrected_idivl(dst_reg, src1_reg, src2_reg, false);
   %}
 
-  enc_class riscv64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
+  enc_class riscv_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
     MacroAssembler _masm(&cbuf);
     Register dst_reg = as_Register($dst$$reg);
     Register src1_reg = as_Register($src1$$reg);
@@ -2542,7 +2531,7 @@ encode %{
     __ corrected_idivq(dst_reg, src1_reg, src2_reg, false);
   %}
 
-  enc_class riscv64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
+  enc_class riscv_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
     MacroAssembler _masm(&cbuf);
     Register dst_reg = as_Register($dst$$reg);
     Register src1_reg = as_Register($src1$$reg);
@@ -2550,7 +2539,7 @@ encode %{
     __ corrected_idivl(dst_reg, src1_reg, src2_reg, true);
   %}
 
-  enc_class riscv64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
+  enc_class riscv_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
     MacroAssembler _masm(&cbuf);
     Register dst_reg = as_Register($dst$$reg);
     Register src1_reg = as_Register($src1$$reg);
@@ -2558,29 +2547,32 @@ encode %{
     __ corrected_idivq(dst_reg, src1_reg, src2_reg, true);
   %}
 
-  enc_class riscv64_enc_tail_call(iRegP jump_target) %{
+  enc_class riscv_enc_tail_call(iRegP jump_target) %{
     MacroAssembler _masm(&cbuf);
+    Assembler::CompressibleRegion cr(&_masm);
     Register target_reg = as_Register($jump_target$$reg);
     __ jr(target_reg);
   %}
 
-  enc_class riscv64_enc_tail_jmp(iRegP jump_target) %{
+  enc_class riscv_enc_tail_jmp(iRegP jump_target) %{
     MacroAssembler _masm(&cbuf);
+    Assembler::CompressibleRegion cr(&_masm);
     Register target_reg = as_Register($jump_target$$reg);
     // exception oop should be in x10
-    // ret addr has been popped into lr
+    // ret addr has been popped into ra
     // callee expects it in x13
-    __ mv(x13, lr);
+    __ mv(x13, ra);
     __ jr(target_reg);
   %}
 
-  enc_class riscv64_enc_rethrow() %{
+  enc_class riscv_enc_rethrow() %{
     MacroAssembler _masm(&cbuf);
     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
   %}
 
-  enc_class riscv64_enc_ret() %{
+  enc_class riscv_enc_ret() %{
     MacroAssembler _masm(&cbuf);
+    Assembler::CompressibleRegion cr(&_masm);
     __ ret();
   %}
 
@@ -2637,7 +2629,7 @@ encode %{
 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 //         even aligned with pad0 as needed.
 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
-//           (the latter is true on Intel but is it false on RISCV64?)
+//           (the latter is true on Intel but is it false on RISCV?)
 //         region 6-11 is even aligned; it may be padded out more so that
 //         the region from SP to FP meets the minimum stack alignment.
 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
@@ -2678,8 +2670,8 @@ frame %{
 
   // Number of stack slots between incoming argument block and the start of
   // a new frame.  The PROLOG must add this many slots to the stack.  The
-  // EPILOG must remove this many slots.
-  // Riscv64 needs two words for LR (return address) and FP (frame pointer).
+  // EPILOG must remove this many slots. RISC-V needs two slots for
+  // return address and fp.
   in_preserve_stack_slots(2 * VMRegImpl::slots_per_word);
 
   // Number of outgoing stack slots killed above the out_preserve_stack_slots
@@ -2839,6 +2831,24 @@ operand immI_le_4()
   interface(CONST_INTER);
 %}
 
+operand immI_16()
+%{
+  predicate(n->get_int() == 16);
+  match(ConI);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_24()
+%{
+  predicate(n->get_int() == 24);
+  match(ConI);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
 operand immI_31()
 %{
   predicate(n->get_int() == 31);
@@ -2981,6 +2991,16 @@ operand immByteMapBase()
   interface(CONST_INTER);
 %}
 
+// Int Immediate: low 16-bit mask
+operand immI_16bits()
+%{
+  predicate(n->get_int() == 0xFFFF);
+  match(ConI);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
 // Long Immediate: low 32-bit mask
 operand immL_32bits()
 %{
@@ -3126,6 +3146,17 @@ operand immLOffset()
   interface(CONST_INTER);
 %}
 
+// Scale values
+operand immIScale()
+%{
+  predicate(1 <= n->get_int() && (n->get_int() <= 3));
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
 // Integer 32 bit Register Operands
 operand iRegI()
 %{
@@ -3253,7 +3284,6 @@ operand iRegP()
   match(RegP);
   match(iRegPNoSp);
   match(iRegP_R10);
-  match(iRegP_R15);
   match(javaThread_RegP);
   op_cost(0);
   format %{ %}
@@ -3586,7 +3616,7 @@ operand indOffLN(iRegN reg, immLOffset off)
   %}
 %}
 
-// Riscv64 opto stubs need to write to the pc slot in the thread anchor
+// RISCV opto stubs need to write to the pc slot in the thread anchor
 operand thread_anchor_pc(javaThread_RegP reg, immL_pc_off off)
 %{
   constraint(ALLOC_IN_RC(ptr_reg));
@@ -4568,6 +4598,7 @@ instruct loadI(iRegINoSp dst, memory mem)
   format %{ "lw  $dst, $mem\t# int, #@loadI" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
   %}
 
@@ -4583,6 +4614,7 @@ instruct loadI2L(iRegLNoSp dst, memory mem)
   format %{ "lw  $dst, $mem\t# int, #@loadI2L" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
   %}
 
@@ -4613,6 +4645,7 @@ instruct loadL(iRegLNoSp dst, memory mem)
   format %{ "ld  $dst, $mem\t# int, #@loadL" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
   %}
 
@@ -4643,6 +4676,7 @@ instruct loadP(iRegPNoSp dst, memory mem)
   format %{ "ld  $dst, $mem\t# ptr, #@loadP" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
   %}
 
@@ -4673,6 +4707,7 @@ instruct loadKlass(iRegPNoSp dst, memory mem)
   format %{ "ld  $dst, $mem\t# class, #@loadKlass" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
   %}
 
@@ -4718,6 +4753,7 @@ instruct loadD(fRegD dst, memory mem)
   format %{ "fld  $dst, $mem\t# double, #@loadD" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     __ fld(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
   %}
 
@@ -4732,7 +4768,7 @@ instruct loadConI(iRegINoSp dst, immI src)
   ins_cost(ALU_COST);
   format %{ "li $dst, $src\t# int, #@loadConI" %}
 
-  ins_encode(riscv64_enc_li_imm(dst, src));
+  ins_encode(riscv_enc_li_imm(dst, src));
 
   ins_pipe(ialu_imm);
 %}
@@ -4745,7 +4781,7 @@ instruct loadConL(iRegLNoSp dst, immL src)
   ins_cost(ALU_COST);
   format %{ "li $dst, $src\t# long, #@loadConL" %}
 
-  ins_encode(riscv64_enc_li_imm(dst, src));
+  ins_encode(riscv_enc_li_imm(dst, src));
 
   ins_pipe(ialu_imm);
 %}
@@ -4758,7 +4794,7 @@ instruct loadConP(iRegPNoSp dst, immP con)
   ins_cost(ALU_COST);
   format %{ "mv  $dst, $con\t# ptr, #@loadConP" %}
 
-  ins_encode(riscv64_enc_mov_p(dst, con));
+  ins_encode(riscv_enc_mov_p(dst, con));
 
   ins_pipe(ialu_imm);
 %}
@@ -4771,7 +4807,7 @@ instruct loadConP0(iRegPNoSp dst, immP0 con)
   ins_cost(ALU_COST);
   format %{ "mv  $dst, $con\t# NULL ptr, #@loadConP0" %}
 
-  ins_encode(riscv64_enc_mov_zero(dst));
+  ins_encode(riscv_enc_mov_zero(dst));
 
   ins_pipe(ialu_imm);
 %}
@@ -4784,7 +4820,7 @@ instruct loadConP1(iRegPNoSp dst, immP_1 con)
   ins_cost(ALU_COST);
   format %{ "mv  $dst, $con\t# load ptr constant one, #@loadConP1" %}
 
-  ins_encode(riscv64_enc_mov_p1(dst));
+  ins_encode(riscv_enc_mov_p1(dst));
 
   ins_pipe(ialu_imm);
 %}
@@ -4797,7 +4833,7 @@ instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
   ins_cost(ALU_COST * 6);
   format %{ "movptr  $dst, $con\t# Poll Page Ptr, #@loadConPollPage" %}
 
-  ins_encode(riscv64_enc_mov_poll_page(dst, con));
+  ins_encode(riscv_enc_mov_poll_page(dst, con));
 
   ins_pipe(ialu_imm);
 %}
@@ -4809,7 +4845,7 @@ instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
   ins_cost(ALU_COST);
   format %{ "mv  $dst, $con\t# Byte Map Base, #@loadByteMapBase" %}
 
-  ins_encode(riscv64_enc_mov_byte_map_base(dst));
+  ins_encode(riscv_enc_mov_byte_map_base(dst));
 
   ins_pipe(ialu_imm);
 %}
@@ -4822,7 +4858,7 @@ instruct loadConN(iRegNNoSp dst, immN con)
   ins_cost(ALU_COST * 4);
   format %{ "mv  $dst, $con\t# compressed ptr, #@loadConN" %}
 
-  ins_encode(riscv64_enc_mov_n(dst, con));
+  ins_encode(riscv_enc_mov_n(dst, con));
 
   ins_pipe(ialu_imm);
 %}
@@ -4835,7 +4871,7 @@ instruct loadConN0(iRegNNoSp dst, immN0 con)
   ins_cost(ALU_COST);
   format %{ "mv  $dst, $con\t# compressed NULL ptr, #@loadConN0" %}
 
-  ins_encode(riscv64_enc_mov_zero(dst));
+  ins_encode(riscv_enc_mov_zero(dst));
 
   ins_pipe(ialu_imm);
 %}
@@ -4848,7 +4884,7 @@ instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
   ins_cost(ALU_COST * 6);
   format %{ "mv  $dst, $con\t# compressed klass ptr, #@loadConNKlass" %}
 
-  ins_encode(riscv64_enc_mov_nk(dst, con));
+  ins_encode(riscv_enc_mov_nk(dst, con));
 
   ins_pipe(ialu_imm);
 %}
@@ -5016,6 +5052,7 @@ instruct storeI(iRegIorL2I src, memory mem)
   format %{ "sw  $src, $mem\t# int, #@storeI" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
   %}
 
@@ -5045,6 +5082,7 @@ instruct storeL(iRegL src, memory mem)
   format %{ "sd  $src, $mem\t# long, #@storeL" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
   %}
 
@@ -5075,6 +5113,7 @@ instruct storeP(iRegP src, memory mem)
   format %{ "sd  $src, $mem\t# ptr, #@storeP" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
   %}
 
@@ -5105,6 +5144,7 @@ instruct storeN(iRegN src, memory mem)
   format %{ "sw  $src, $mem\t# compressed ptr, #@storeN" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
   %}
 
@@ -5115,7 +5155,7 @@ instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
 %{
   match(Set mem (StoreN mem zero));
   predicate(Universe::narrow_oop_base() == NULL &&
-              Universe::narrow_klass_base() == NULL);
+            Universe::narrow_klass_base() == NULL);
 
   ins_cost(STORE_COST);
   format %{ "sw  rheapbase, $mem\t# compressed ptr (rheapbase==0), #@storeImmN0" %}
@@ -5151,6 +5191,7 @@ instruct storeD(fRegD src, memory mem)
   format %{ "fsd  $src, $mem\t# double, #@storeD" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     __ fsd(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
   %}
 
@@ -5166,6 +5207,7 @@ instruct storeNKlass(iRegN src, memory mem)
   format %{ "sw  $src, $mem\t# compressed klass ptr, #@storeNKlass" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
   %}
 
@@ -5189,7 +5231,7 @@ instruct storeNKlass(iRegN src, memory mem)
 // only for 64-bit.
 //
 // We implement LoadPLocked and storePConditional instructions using,
-// respectively the RISCV64 hw load-reserve and store-conditional
+// respectively the RISCV hw load-reserve and store-conditional
 // instructions. Whereas we must implement each of
 // Store{IL}Conditional using a CAS which employs a pair of
 // instructions comprising a load-reserve followed by a
@@ -5237,7 +5279,9 @@ instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFla
 %}
 
 // storeLConditional is used by PhaseMacroExpand::expand_lock_node
-// when attempting to rebias a lock towards the current thread.
+// when attempting to rebias a lock towards the current thread.  We
+// must use the acquire form of cmpxchg in order to guarantee acquire
+// semantics in this case.
 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
 %{
   match(Set cr (StoreLConditional mem (Binary oldval newval)));
@@ -5282,13 +5326,14 @@ instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFl
 
 // standard CompareAndSwapX when we are using barriers
 // these have higher priority than the rules selected by a predicate
-instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
+instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+                         iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
 %{
   match(Set res (CompareAndSwapB mem (Binary oldval newval)));
 
   ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4);
 
-  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
+  effect(TEMP_DEF res, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
 
   format %{
     "cmpxchg $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t"
@@ -5304,13 +5349,14 @@ instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R1
   ins_pipe(pipe_slow);
 %}
 
-instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
+instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+                         iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
 %{
   match(Set res (CompareAndSwapS mem (Binary oldval newval)));
 
   ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4);
 
-  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
+  effect(TEMP_DEF res, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
 
   format %{
     "cmpxchg $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t"
@@ -5337,7 +5383,7 @@ instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoS
     "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapI"
   %}
 
-  ins_encode(riscv64_enc_cmpxchgw(res, mem, oldval, newval));
+  ins_encode(riscv_enc_cmpxchgw(res, mem, oldval, newval));
 
   ins_pipe(pipe_slow);
 %}
@@ -5353,7 +5399,7 @@ instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoS
     "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapL"
   %}
 
-  ins_encode(riscv64_enc_cmpxchg(res, mem, oldval, newval));
+  ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval));
 
   ins_pipe(pipe_slow);
 %}
@@ -5369,7 +5415,7 @@ instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval
     "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapP"
   %}
 
-  ins_encode(riscv64_enc_cmpxchg(res, mem, oldval, newval));
+  ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval));
 
   ins_pipe(pipe_slow);
 %}
@@ -5385,7 +5431,7 @@ instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoS
     "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapN"
   %}
 
-  ins_encode(riscv64_enc_cmpxchgn(res, mem, oldval, newval));
+  ins_encode(riscv_enc_cmpxchgn(res, mem, oldval, newval));
 
   ins_pipe(pipe_slow);
 %}
@@ -5394,7 +5440,7 @@ instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoS
 instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
                             iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
 %{
-  predicate(needs_acquiring_load_exclusive(n));
+  predicate(needs_acquiring_load_reserved(n));
 
   match(Set res (CompareAndSwapB mem (Binary oldval newval)));
 
@@ -5419,7 +5465,7 @@ instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI
 instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
                             iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
 %{
-  predicate(needs_acquiring_load_exclusive(n));
+  predicate(needs_acquiring_load_reserved(n));
 
   match(Set res (CompareAndSwapS mem (Binary oldval newval)));
 
@@ -5443,7 +5489,7 @@ instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI
 
 instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval)
 %{
-  predicate(needs_acquiring_load_exclusive(n));
+  predicate(needs_acquiring_load_reserved(n));
 
   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
 
@@ -5454,14 +5500,14 @@ instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegI
     "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapIAcq"
   %}
 
-  ins_encode(riscv64_enc_cmpxchgw_acq(res, mem, oldval, newval));
+  ins_encode(riscv_enc_cmpxchgw_acq(res, mem, oldval, newval));
 
   ins_pipe(pipe_slow);
 %}
 
 instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval)
 %{
-  predicate(needs_acquiring_load_exclusive(n));
+  predicate(needs_acquiring_load_reserved(n));
 
   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
 
@@ -5472,14 +5518,14 @@ instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegL
     "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapLAcq"
   %}
 
-  ins_encode(riscv64_enc_cmpxchg_acq(res, mem, oldval, newval));
+  ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval));
 
   ins_pipe(pipe_slow);
 %}
 
 instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
 %{
-  predicate(needs_acquiring_load_exclusive(n));
+  predicate(needs_acquiring_load_reserved(n));
 
   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
 
@@ -5490,14 +5536,14 @@ instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP new
     "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapPAcq"
   %}
 
-  ins_encode(riscv64_enc_cmpxchg_acq(res, mem, oldval, newval));
+  ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval));
 
   ins_pipe(pipe_slow);
 %}
 
 instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval)
 %{
-  predicate(needs_acquiring_load_exclusive(n));
+  predicate(needs_acquiring_load_reserved(n));
 
   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
 
@@ -5508,7 +5554,7 @@ instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegN
     "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapNAcq"
   %}
 
-  ins_encode(riscv64_enc_cmpxchgn_acq(res, mem, oldval, newval));
+  ins_encode(riscv_enc_cmpxchgn_acq(res, mem, oldval, newval));
 
   ins_pipe(pipe_slow);
 %}
@@ -5646,7 +5692,7 @@ instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP ne
 instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
                                 iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
 %{
-  predicate(needs_acquiring_load_exclusive(n));
+  predicate(needs_acquiring_load_reserved(n));
 
   match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
 
@@ -5670,7 +5716,7 @@ instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, i
 instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
                                 iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
 %{
-  predicate(needs_acquiring_load_exclusive(n));
+  predicate(needs_acquiring_load_reserved(n));
 
   match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
 
@@ -5693,7 +5739,7 @@ instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, i
 
 instruct compareAndExchangeIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
 %{
-  predicate(needs_acquiring_load_exclusive(n));
+  predicate(needs_acquiring_load_reserved(n));
 
   match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
 
@@ -5715,7 +5761,7 @@ instruct compareAndExchangeIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI
 
 instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval)
 %{
-  predicate(needs_acquiring_load_exclusive(n));
+  predicate(needs_acquiring_load_reserved(n));
 
   match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
 
@@ -5737,7 +5783,7 @@ instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL
 
 instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval)
 %{
-  predicate(needs_acquiring_load_exclusive(n));
+  predicate(needs_acquiring_load_reserved(n));
 
   match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
 
@@ -5759,7 +5805,7 @@ instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN
 
 instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval)
 %{
-  predicate(needs_acquiring_load_exclusive(n));
+  predicate(needs_acquiring_load_reserved(n));
 
   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
 
@@ -5910,7 +5956,7 @@ instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP ne
 instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
                                 iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
 %{
-  predicate(needs_acquiring_load_exclusive(n));
+  predicate(needs_acquiring_load_reserved(n));
 
   match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
 
@@ -5936,7 +5982,7 @@ instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, i
 instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
                                 iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
 %{
-  predicate(needs_acquiring_load_exclusive(n));
+  predicate(needs_acquiring_load_reserved(n));
 
   match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
 
@@ -5961,7 +6007,7 @@ instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, i
 
 instruct weakCompareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
 %{
-  predicate(needs_acquiring_load_exclusive(n));
+  predicate(needs_acquiring_load_reserved(n));
 
   match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
 
@@ -5983,7 +6029,7 @@ instruct weakCompareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI
 
 instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval)
 %{
-  predicate(needs_acquiring_load_exclusive(n));
+  predicate(needs_acquiring_load_reserved(n));
 
   match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
 
@@ -6005,7 +6051,7 @@ instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL
 
 instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
 %{
-  predicate(needs_acquiring_load_exclusive(n));
+  predicate(needs_acquiring_load_reserved(n));
 
   match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
 
@@ -6027,7 +6073,7 @@ instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN
 
 instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
 %{
-  predicate(needs_acquiring_load_exclusive(n));
+  predicate(needs_acquiring_load_reserved(n));
 
   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
 
@@ -6109,7 +6155,7 @@ instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev)
 
 instruct get_and_setIAcq(indirect mem, iRegI newv, iRegINoSp prev)
 %{
-  predicate(needs_acquiring_load_exclusive(n));
+  predicate(needs_acquiring_load_reserved(n));
 
   match(Set prev (GetAndSetI mem newv));
 
@@ -6126,7 +6172,7 @@ instruct get_and_setIAcq(indirect mem, iRegI newv, iRegINoSp prev)
 
 instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev)
 %{
-  predicate(needs_acquiring_load_exclusive(n));
+  predicate(needs_acquiring_load_reserved(n));
 
   match(Set prev (GetAndSetL mem newv));
 
@@ -6143,7 +6189,7 @@ instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev)
 
 instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev)
 %{
-  predicate(needs_acquiring_load_exclusive(n));
+  predicate(needs_acquiring_load_reserved(n));
 
   match(Set prev (GetAndSetN mem newv));
 
@@ -6160,7 +6206,7 @@ instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev)
 
 instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev)
 %{
-  predicate(needs_acquiring_load_exclusive(n));
+  predicate(needs_acquiring_load_reserved(n));
 
   match(Set prev (GetAndSetP mem newv));
 
@@ -6305,7 +6351,7 @@ instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAdd incr)
 
 instruct get_and_addLAcq(indirect mem, iRegLNoSp newval, iRegL incr)
 %{
-  predicate(needs_acquiring_load_exclusive(n));
+  predicate(needs_acquiring_load_reserved(n));
 
   match(Set newval (GetAndAddL mem incr));
 
@@ -6321,7 +6367,7 @@ instruct get_and_addLAcq(indirect mem, iRegLNoSp newval, iRegL incr)
 %}
 
 instruct get_and_addL_no_resAcq(indirect mem, Universe dummy, iRegL incr) %{
-  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
+  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n));
 
   match(Set dummy (GetAndAddL mem incr));
 
@@ -6338,7 +6384,7 @@ instruct get_and_addL_no_resAcq(indirect mem, Universe dummy, iRegL incr) %{
 
 instruct get_and_addLiAcq(indirect mem, iRegLNoSp newval, immLAdd incr)
 %{
-  predicate(needs_acquiring_load_exclusive(n));
+  predicate(needs_acquiring_load_reserved(n));
 
   match(Set newval (GetAndAddL mem incr));
 
@@ -6355,7 +6401,7 @@ instruct get_and_addLiAcq(indirect mem, iRegLNoSp newval, immLAdd incr)
 
 instruct get_and_addLi_no_resAcq(indirect mem, Universe dummy, immLAdd incr)
 %{
-  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
+  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n));
 
   match(Set dummy (GetAndAddL mem incr));
 
@@ -6372,7 +6418,7 @@ instruct get_and_addLi_no_resAcq(indirect mem, Universe dummy, immLAdd incr)
 
 instruct get_and_addIAcq(indirect mem, iRegINoSp newval, iRegIorL2I incr)
 %{
-  predicate(needs_acquiring_load_exclusive(n));
+  predicate(needs_acquiring_load_reserved(n));
 
   match(Set newval (GetAndAddI mem incr));
 
@@ -6389,7 +6435,7 @@ instruct get_and_addIAcq(indirect mem, iRegINoSp newval, iRegIorL2I incr)
 
 instruct get_and_addI_no_resAcq(indirect mem, Universe dummy, iRegIorL2I incr)
 %{
-  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
+  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n));
 
   match(Set dummy (GetAndAddI mem incr));
 
@@ -6406,7 +6452,7 @@ instruct get_and_addI_no_resAcq(indirect mem, Universe dummy, iRegIorL2I incr)
 
 instruct get_and_addIiAcq(indirect mem, iRegINoSp newval, immIAdd incr)
 %{
-  predicate(needs_acquiring_load_exclusive(n));
+  predicate(needs_acquiring_load_reserved(n));
 
   match(Set newval (GetAndAddI mem incr));
 
@@ -6423,7 +6469,7 @@ instruct get_and_addIiAcq(indirect mem, iRegINoSp newval, immIAdd incr)
 
 instruct get_and_addIi_no_resAcq(indirect mem, Universe dummy, immIAdd incr)
 %{
-  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
+  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n));
 
   match(Set dummy (GetAndAddI mem incr));
 
@@ -6456,6 +6502,7 @@ instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
   format %{ "addw  $dst, $src1, $src2\t#@addI_reg_reg" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     __ addw(as_Register($dst$$reg),
             as_Register($src1$$reg),
             as_Register($src2$$reg));
@@ -6471,6 +6518,7 @@ instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAdd src2) %{
   format %{ "addiw  $dst, $src1, $src2\t#@addI_reg_imm" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     int32_t con = (int32_t)$src2$$constant;
     __ addiw(as_Register($dst$$reg),
              as_Register($src1$$reg),
@@ -6487,6 +6535,7 @@ instruct addI_reg_imm_l2i(iRegINoSp dst, iRegL src1, immIAdd src2) %{
   format %{ "addiw  $dst, $src1, $src2\t#@addI_reg_imm_l2i" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     __ addiw(as_Register($dst$$reg),
              as_Register($src1$$reg),
              $src2$$constant);
@@ -6503,6 +6552,7 @@ instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
   format %{ "add $dst, $src1, $src2\t# ptr, #@addP_reg_reg" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     __ add(as_Register($dst$$reg),
            as_Register($src1$$reg),
            as_Register($src2$$reg));
@@ -6512,12 +6562,13 @@ instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
 %}
 
 // If we shift more than 32 bits, we need not convert I2L.
-instruct lShiftL_regI_immGE32(iRegLNoSp dst, iRegIorL2I src, uimmI6_ge32 scale) %{
+instruct lShiftL_regI_immGE32(iRegLNoSp dst, iRegI src, uimmI6_ge32 scale) %{
   match(Set dst (LShiftL (ConvI2L src) scale));
   ins_cost(ALU_COST);
   format %{ "slli  $dst, $src, $scale & 63\t#@lShiftL_regI_immGE32" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     __ slli(as_Register($dst$$reg), as_Register($src$$reg), $scale$$constant & 63);
   %}
 
@@ -6533,6 +6584,7 @@ instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAdd src2) %{
   format %{ "addi  $dst, $src1, $src2\t# ptr, #@addP_reg_imm" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     // src2 is imm, so actually call the addi
     __ add(as_Register($dst$$reg),
            as_Register($src1$$reg),
@@ -6549,6 +6601,7 @@ instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
   format %{ "add  $dst, $src1, $src2\t#@addL_reg_reg" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     __ add(as_Register($dst$$reg),
            as_Register($src1$$reg),
            as_Register($src2$$reg));
@@ -6564,6 +6617,7 @@ instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{
   format %{ "addi  $dst, $src1, $src2\t#@addL_reg_imm" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     // src2 is imm, so actually call the addi
     __ add(as_Register($dst$$reg),
            as_Register($src1$$reg),
@@ -6581,6 +6635,7 @@ instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
   format %{ "subw  $dst, $src1, $src2\t#@subI_reg_reg" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     __ subw(as_Register($dst$$reg),
             as_Register($src1$$reg),
             as_Register($src2$$reg));
@@ -6597,6 +6652,7 @@ instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immISub src2) %{
   format %{ "addiw  $dst, $src1, -$src2\t#@subI_reg_imm" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     // src2 is imm, so actually call the addiw
     __ subw(as_Register($dst$$reg),
             as_Register($src1$$reg),
@@ -6613,6 +6669,7 @@ instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
   format %{ "sub  $dst, $src1, $src2\t#@subL_reg_reg" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     __ sub(as_Register($dst$$reg),
            as_Register($src1$$reg),
            as_Register($src2$$reg));
@@ -6628,6 +6685,7 @@ instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLSub src2) %{
   format %{ "addi  $dst, $src1, -$src2\t#@subL_reg_imm" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     // src2 is imm, so actually call the addi
     __ sub(as_Register($dst$$reg),
            as_Register($src1$$reg),
@@ -6687,21 +6745,6 @@ instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
   ins_pipe(imul_reg_reg);
 %}
 
-instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
-  match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
-  ins_cost(IMUL_COST);
-  format %{ "mul  $dst, $src1, $src2\t#@smulI" %}
-
-  // Signed Multiply Long multiplies two 32-bit signed values to produce a 64-bit result.
-  ins_encode %{
-    __ mul(as_Register($dst$$reg),
-            as_Register($src1$$reg),
-            as_Register($src2$$reg));
-  %}
-
-  ins_pipe(imul_reg_reg);
-%}
-
 // Long Multiply
 
 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
@@ -6740,7 +6783,7 @@ instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
   ins_cost(IDIVSI_COST);
   format %{ "divw  $dst, $src1, $src2\t#@divI"%}
 
-  ins_encode(riscv64_enc_divw(dst, src1, src2));
+  ins_encode(riscv_enc_divw(dst, src1, src2));
   ins_pipe(idiv_reg_reg);
 %}
 
@@ -6762,7 +6805,7 @@ instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
   ins_cost(IDIVDI_COST);
   format %{ "div  $dst, $src1, $src2\t#@divL" %}
 
-  ins_encode(riscv64_enc_div(dst, src1, src2));
+  ins_encode(riscv_enc_div(dst, src1, src2));
   ins_pipe(ldiv_reg_reg);
 %}
 
@@ -6772,6 +6815,7 @@ instruct signExtractL(iRegLNoSp dst, iRegL src1, immI_63 div1, immI_63 div2) %{
   format %{ "srli $dst, $src1, $div1\t# long signExtract, #@signExtractL" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     __ srli(as_Register($dst$$reg), as_Register($src1$$reg), 63);
   %}
   ins_pipe(ialu_reg_shift);
@@ -6784,7 +6828,7 @@ instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
   ins_cost(IDIVSI_COST);
   format %{ "remw  $dst, $src1, $src2\t#@modI" %}
 
-  ins_encode(riscv64_enc_modw(dst, src1, src2));
+  ins_encode(riscv_enc_modw(dst, src1, src2));
   ins_pipe(ialu_reg_reg);
 %}
 
@@ -6795,13 +6839,14 @@ instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
   ins_cost(IDIVDI_COST);
   format %{ "rem  $dst, $src1, $src2\t#@modL" %}
 
-  ins_encode(riscv64_enc_mod(dst, src1, src2));
+  ins_encode(riscv_enc_mod(dst, src1, src2));
   ins_pipe(ialu_reg_reg);
 %}
 
 // Integer Shifts
 
 // Shift Left Register
+// In RV64I, only the low 5 bits of src2 are considered for the shift amount
 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
   match(Set dst (LShiftI src1 src2));
   ins_cost(ALU_COST);
@@ -6834,6 +6879,7 @@ instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
 %}
 
 // Shift Right Logical Register
+// In RV64I, only the low 5 bits of src2 are considered for the shift amount
 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
   match(Set dst (URShiftI src1 src2));
   ins_cost(ALU_COST);
@@ -6866,6 +6912,7 @@ instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
 %}
 
 // Shift Right Arithmetic Register
+// In RV64I, only the low 5 bits of src2 are considered for the shift amount
 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
   match(Set dst (RShiftI src1 src2));
   ins_cost(ALU_COST);
@@ -6900,6 +6947,7 @@ instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
 // Long Shifts
 
 // Shift Left Register
+// In RV64I, only the low 6 bits of src2 are considered for the shift amount
 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
   match(Set dst (LShiftL src1 src2));
 
@@ -6923,6 +6971,7 @@ instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
   format %{ "slli  $dst, $src1, ($src2 & 0x3f)\t#@lShiftL_reg_imm" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     // the shift amount is encoded in the lower
     // 6 bits of the I-immediate field for RV64I
     __ slli(as_Register($dst$$reg),
@@ -6934,6 +6983,7 @@ instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
 %}
 
 // Shift Right Logical Register
+// In RV64I, only the low 6 bits of src2 are considered for the shift amount
 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
   match(Set dst (URShiftL src1 src2));
 
@@ -6957,6 +7007,7 @@ instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
   format %{ "srli  $dst, $src1, ($src2 & 0x3f)\t#@urShiftL_reg_imm" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     // the shift amount is encoded in the lower
     // 6 bits of the I-immediate field for RV64I
     __ srli(as_Register($dst$$reg),
@@ -6975,6 +7026,7 @@ instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
   format %{ "srli  $dst, p2x($src1), ($src2 & 0x3f)\t#@urShiftP_reg_imm" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     // the shift amount is encoded in the lower
     // 6 bits of the I-immediate field for RV64I
     __ srli(as_Register($dst$$reg),
@@ -6986,6 +7038,7 @@ instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
 %}
 
 // Shift Right Arithmetic Register
+// In RV64I, only the low 6 bits of src2 are considered for the shift amount
 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
   match(Set dst (RShiftL src1 src2));
 
@@ -7009,6 +7062,7 @@ instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
   format %{ "srai  $dst, $src1, ($src2 & 0x3f)\t#@rShiftL_reg_imm" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     // the shift amount is encoded in the lower
     // 6 bits of the I-immediate field for RV64I
     __ srai(as_Register($dst$$reg),
@@ -7019,8 +7073,7 @@ instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
   ins_pipe(ialu_reg_shift);
 %}
 
-instruct regI_not_reg(iRegINoSp dst,
-                         iRegIorL2I src1, immI_M1 m1) %{
+instruct regI_not_reg(iRegINoSp dst, iRegI src1, immI_M1 m1) %{
   match(Set dst (XorI src1 m1));
   ins_cost(ALU_COST);
   format %{ "xori  $dst, $src1, -1\t#@regI_not_reg" %}
@@ -7032,8 +7085,7 @@ instruct regI_not_reg(iRegINoSp dst,
   ins_pipe(ialu_reg);
 %}
 
-instruct regL_not_reg(iRegLNoSp dst,
-                      iRegL src1, immL_M1 m1) %{
+instruct regL_not_reg(iRegLNoSp dst, iRegL src1, immL_M1 m1) %{
   match(Set dst (XorL src1 m1));
   ins_cost(ALU_COST);
   format %{ "xori  $dst, $src1, -1\t#@regL_not_reg" %}
@@ -7409,6 +7461,44 @@ instruct negD_reg_reg(fRegD dst, fRegD src) %{
   ins_pipe(fp_uop_d);
 %}
 
+instruct absI_reg(iRegINoSp dst, iRegIorL2I src) %{
+  match(Set dst (AbsI src));
+
+  ins_cost(ALU_COST * 3);
+  format %{
+    "sraiw  t0, $src, 0x1f\n\t"
+    "addw  $dst, $src, t0\n\t"
+    "xorr  $dst, $dst, t0\t#@absI_reg"
+  %}
+
+  ins_encode %{
+    __ sraiw(t0, as_Register($src$$reg), 0x1f);
+    __ addw(as_Register($dst$$reg), as_Register($src$$reg), t0);
+    __ xorr(as_Register($dst$$reg), as_Register($dst$$reg), t0);
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct absL_reg(iRegLNoSp dst, iRegL src) %{
+  match(Set dst (AbsL src));
+
+  ins_cost(ALU_COST * 3);
+  format %{
+    "srai  t0, $src, 0x3f\n\t"
+    "add  $dst, $src, t0\n\t"
+    "xorr  $dst, $dst, t0\t#@absL_reg"
+  %}
+
+  ins_encode %{
+    __ srai(t0, as_Register($src$$reg), 0x3f);
+    __ add(as_Register($dst$$reg), as_Register($src$$reg), t0);
+    __ xorr(as_Register($dst$$reg), as_Register($dst$$reg), t0);
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
 instruct absF_reg(fRegF dst, fRegF src) %{
   match(Set dst (AbsF src));
 
@@ -7436,7 +7526,7 @@ instruct absD_reg(fRegD dst, fRegD src) %{
 %}
 
 instruct sqrtF_reg(fRegF dst, fRegF src) %{
-  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
+  match(Set dst (SqrtF src));
 
   ins_cost(FSQRT_COST);
   format %{ "fsqrt.s  $dst, $src\t#@sqrtF_reg" %}
@@ -7467,13 +7557,14 @@ instruct sqrtD_reg(fRegD dst, fRegD src) %{
 // Logical Instructions
 
 // Register And
-instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+instruct andI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{
   match(Set dst (AndI src1 src2));
 
   format %{ "andr  $dst, $src1, $src2\t#@andI_reg_reg" %}
 
   ins_cost(ALU_COST);
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     __ andr(as_Register($dst$$reg),
             as_Register($src1$$reg),
             as_Register($src2$$reg));
@@ -7483,13 +7574,14 @@ instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
 %}
 
 // Immediate And
-instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAdd src2) %{
+instruct andI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{
   match(Set dst (AndI src1 src2));
 
   format %{ "andi  $dst, $src1, $src2\t#@andI_reg_imm" %}
 
   ins_cost(ALU_COST);
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     __ andi(as_Register($dst$$reg),
             as_Register($src1$$reg),
             (int32_t)($src2$$constant));
@@ -7499,13 +7591,14 @@ instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAdd src2) %{
 %}
 
 // Register Or
-instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+instruct orI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{
   match(Set dst (OrI src1 src2));
 
   format %{ "orr  $dst, $src1, $src2\t#@orI_reg_reg" %}
 
   ins_cost(ALU_COST);
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     __ orr(as_Register($dst$$reg),
            as_Register($src1$$reg),
            as_Register($src2$$reg));
@@ -7515,7 +7608,7 @@ instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
 %}
 
 // Immediate Or
-instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAdd src2) %{
+instruct orI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{
   match(Set dst (OrI src1 src2));
 
   format %{ "ori  $dst, $src1, $src2\t#@orI_reg_imm" %}
@@ -7531,13 +7624,14 @@ instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAdd src2) %{
 %}
 
 // Register Xor
-instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+instruct xorI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{
   match(Set dst (XorI src1 src2));
 
   format %{ "xorr  $dst, $src1, $src2\t#@xorI_reg_reg" %}
 
   ins_cost(ALU_COST);
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     __ xorr(as_Register($dst$$reg),
             as_Register($src1$$reg),
             as_Register($src2$$reg));
@@ -7547,7 +7641,7 @@ instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
 %}
 
 // Immediate Xor
-instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAdd src2) %{
+instruct xorI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{
   match(Set dst (XorI src1 src2));
 
   format %{ "xori  $dst, $src1, $src2\t#@xorI_reg_imm" %}
@@ -7570,6 +7664,7 @@ instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
 
   ins_cost(ALU_COST);
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     __ andr(as_Register($dst$$reg),
             as_Register($src1$$reg),
             as_Register($src2$$reg));
@@ -7586,6 +7681,7 @@ instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{
 
   ins_cost(ALU_COST);
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     __ andi(as_Register($dst$$reg),
             as_Register($src1$$reg),
             (int32_t)($src2$$constant));
@@ -7602,6 +7698,7 @@ instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
 
   ins_cost(ALU_COST);
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     __ orr(as_Register($dst$$reg),
            as_Register($src1$$reg),
            as_Register($src2$$reg));
@@ -7634,6 +7731,7 @@ instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
 
   ins_cost(ALU_COST);
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     __ xorr(as_Register($dst$$reg),
             as_Register($src1$$reg),
             as_Register($src2$$reg));
@@ -7661,29 +7759,29 @@ instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{
 // ============================================================================
 // BSWAP Instructions
 
-instruct bytes_reverse_int(rFlagsReg cr, iRegINoSp dst, iRegIorL2I src) %{
+instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr) %{
   match(Set dst (ReverseBytesI src));
   effect(TEMP cr);
 
-  ins_cost(ALU_COST * 17);
-  format %{ "grevw  $dst, $src\t#@bytes_reverse_int" %}
+  ins_cost(ALU_COST * 13);
+  format %{ "revb_w_w  $dst, $src\t#@bytes_reverse_int" %}
 
   ins_encode %{
-    __ grevw(as_Register($dst$$reg), as_Register($src$$reg));
+    __ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg));
   %}
 
   ins_pipe(ialu_reg);
 %}
 
-instruct bytes_reverse_long(rFlagsReg cr, iRegLNoSp dst, iRegL src) %{
+instruct bytes_reverse_long(iRegLNoSp dst, iRegL src, rFlagsReg cr) %{
   match(Set dst (ReverseBytesL src));
   effect(TEMP cr);
 
-  ins_cost(ALU_COST * 45);
-  format %{ "grev  $dst, $src\t#@bytes_reverse_long" %}
+  ins_cost(ALU_COST * 29);
+  format %{ "revb  $dst, $src\t#@bytes_reverse_long" %}
 
   ins_encode %{
-    __ grev(as_Register($dst$$reg), as_Register($src$$reg));
+    __ revb(as_Register($dst$$reg), as_Register($src$$reg));
   %}
 
   ins_pipe(ialu_reg);
@@ -7693,10 +7791,10 @@ instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
   match(Set dst (ReverseBytesUS src));
 
   ins_cost(ALU_COST * 5);
-  format %{ "grevhu  $dst, $src\t#@bytes_reverse_unsigned_short" %}
+  format %{ "revb_h_h_u  $dst, $src\t#@bytes_reverse_unsigned_short" %}
 
   ins_encode %{
-    __ grevhu(as_Register($dst$$reg), as_Register($src$$reg));
+    __ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg));
   %}
 
   ins_pipe(ialu_reg);
@@ -7706,10 +7804,10 @@ instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
   match(Set dst (ReverseBytesS src));
 
   ins_cost(ALU_COST * 5);
-  format %{ "grevh  $dst, $src\t#@bytes_reverse_short" %}
+  format %{ "revb_h_h  $dst, $src\t#@bytes_reverse_short" %}
 
   ins_encode %{
-    __ grevh(as_Register($dst$$reg), as_Register($src$$reg));
+    __ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg));
   %}
 
   ins_pipe(ialu_reg);
@@ -7834,6 +7932,7 @@ instruct castX2P(iRegPNoSp dst, iRegL src) %{
   format %{ "mv  $dst, $src\t# long -> ptr, #@castX2P" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     if ($dst$$reg != $src$$reg) {
       __ mv(as_Register($dst$$reg), as_Register($src$$reg));
     }
@@ -7849,6 +7948,7 @@ instruct castP2X(iRegLNoSp dst, iRegP src) %{
   format %{ "mv  $dst, $src\t# ptr -> long, #@castP2X" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     if ($dst$$reg != $src$$reg) {
       __ mv(as_Register($dst$$reg), as_Register($src$$reg));
     }
@@ -7894,7 +7994,7 @@ instruct checkCastPP(iRegPNoSp dst)
 // Convert Instructions
 
 // int to bool
-instruct convI2Bool(iRegINoSp dst, iRegIorL2I src)
+instruct convI2Bool(iRegINoSp dst, iRegI src)
 %{
   match(Set dst (Conv2B src));
 
@@ -7930,7 +8030,7 @@ instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
   match(Set dst (ConvI2L src));
 
   ins_cost(ALU_COST);
-  format %{ "addw  $dst, $src\t#@convI2L_reg_reg" %}
+  format %{ "addw  $dst, $src, zr\t#@convI2L_reg_reg" %}
   ins_encode %{
     __ addw(as_Register($dst$$reg), as_Register($src$$reg), zr);
   %}
@@ -7950,18 +8050,17 @@ instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
   ins_pipe(ialu_reg);
 %}
 
-// unsigned int to long (Zero-extend)
-// this pattern occurs in bigmath arithmetic
-instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
+// int to unsigned long (Zero-extend)
+instruct convI2UL_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
 %{
   match(Set dst (AndL (ConvI2L src) mask));
 
   ins_cost(ALU_COST * 2);
-  format %{ "slli  $dst, $src, 32\t# ui2l\n\t"
-            "srli  $dst, $dst, 32\t# ui2l, #@convUI2L_reg_reg" %}
+  format %{ "zero_extend $dst, $src, 32\t# i2ul, #@convI2UL_reg_reg" %}
 
   ins_encode %{
-    __ zero_ext(as_Register($dst$$reg), as_Register($src$$reg), 32);
+    Assembler::CompressibleRegion cr(&_masm);
+    __ zero_extend(as_Register($dst$$reg), as_Register($src$$reg), 32);
   %}
 
   ins_pipe(ialu_reg_shift);
@@ -8111,15 +8210,12 @@ instruct convL2D_reg_reg(fRegD dst, iRegL src) %{
 instruct convP2I(iRegINoSp dst, iRegP src) %{
   match(Set dst (ConvL2I (CastP2X src)));
 
-  ins_cost(ALU_COST);
-  format %{ "mv  $dst,  $src\t# ptr -> int\n\t"
-            "slli $dst, $dst, 32\n\t"
-            "srli $dst, $dst, 32\t#@convP2I"
-  %}
+  ins_cost(ALU_COST * 2);
+  format %{ "zero_extend $dst, $src, 32\t# ptr -> int, #@convP2I" %}
 
   ins_encode %{
-    __ mv($dst$$Register, $src$$Register);
-    __ clear_upper_bits($dst$$Register, 32);
+    Assembler::CompressibleRegion cr(&_masm);
+    __ zero_extend($dst$$Register, $src$$Register, 32);
   %}
 
   ins_pipe(ialu_reg);
@@ -8136,6 +8232,7 @@ instruct convN2I(iRegINoSp dst, iRegN src)
   format %{ "mv  $dst, $src\t# compressed ptr -> int, #@convN2I" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     __ mv($dst$$Register, $src$$Register);
   %}
 
@@ -8201,26 +8298,7 @@ instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
    ins_pipe(ialu_reg);
 %}
 
-instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
-  predicate(!maybe_use_tmp_register_decoding_klass());
-
-  match(Set dst (DecodeNKlass src));
-
-  ins_cost(ALU_COST);
-  format %{ "decode_klass_not_null  $dst, $src\t#@decodeKlass_not_null" %}
-
-  ins_encode %{
-    Register src_reg = as_Register($src$$reg);
-    Register dst_reg = as_Register($dst$$reg);
-    __ decode_klass_not_null(dst_reg, src_reg, UseCompressedOops ? xheapbase : t0);
-  %}
-
-   ins_pipe(ialu_reg);
-%}
-
-instruct decodeKlass_not_null_with_tmp(iRegPNoSp dst, iRegN src, rFlagsReg tmp) %{
-  predicate(maybe_use_tmp_register_decoding_klass());
-
+instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src, iRegPNoSp tmp) %{
   match(Set dst (DecodeNKlass src));
 
   effect(TEMP tmp);
@@ -8251,6 +8329,7 @@ instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
   format %{ "lw  $dst, $src\t#@MoveF2I_stack_reg" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     __ lw(as_Register($dst$$reg), Address(sp, $src$$disp));
   %}
 
@@ -8287,6 +8366,7 @@ instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
   format %{ "ld  $dst, $src\t#@MoveD2L_stack_reg" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     __ ld(as_Register($dst$$reg), Address(sp, $src$$disp));
   %}
 
@@ -8305,6 +8385,7 @@ instruct MoveL2D_stack_reg(fRegD dst, stackSlotL src) %{
   format %{ "fld  $dst, $src\t#@MoveL2D_stack_reg" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     __ fld(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
   %}
 
@@ -8341,6 +8422,7 @@ instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
   format %{ "sw  $src, $dst\t#@MoveI2F_reg_stack" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     __ sw(as_Register($src$$reg), Address(sp, $dst$$disp));
   %}
 
@@ -8359,6 +8441,7 @@ instruct MoveD2L_reg_stack(stackSlotL dst, fRegD src) %{
   format %{ "fsd  $dst, $src\t#@MoveD2L_reg_stack" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     __ fsd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
   %}
 
@@ -8377,6 +8460,7 @@ instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
   format %{ "sd  $src, $dst\t#@MoveL2D_reg_stack" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     __ sd(as_Register($src$$reg), Address(sp, $dst$$disp));
   %}
 
@@ -8515,7 +8599,7 @@ instruct cmpL3_reg_reg(iRegINoSp dst, iRegL op1, iRegL op2)
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q)
+instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegI p, iRegI q)
 %{
   match(Set dst (CmpLTMask p q));
 
@@ -8623,7 +8707,7 @@ instruct branch(label lbl)
   ins_cost(BRANCH_COST);
   format %{ "j  $lbl\t#@branch" %}
 
-  ins_encode(riscv64_enc_j(lbl));
+  ins_encode(riscv_enc_j(lbl));
 
   ins_pipe(pipe_branch);
 %}
@@ -9573,7 +9657,7 @@ instruct far_cmpULtGe_reg_imm0_branch(cmpOpULtGe cmp, iRegI op1, immI0 zero, lab
 
   format %{ "j  $lbl if $cmp == ge\t#@far_cmpULtGe_reg_imm0_branch" %}
 
-  ins_encode(riscv64_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));
+  ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));
 
   ins_pipe(pipe_cmpz_branch);
 %}
@@ -9588,7 +9672,7 @@ instruct far_cmpULtGe_reg_imm0_loop(cmpOpULtGe cmp, iRegI op1, immI0 zero, label
 
   format %{ "j  $lbl if $cmp == ge\t#@far_cmpULtGe_reg_imm0_loop" %}
 
-  ins_encode(riscv64_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));
+  ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));
 
   ins_pipe(pipe_cmpz_branch);
 %}
@@ -9672,7 +9756,7 @@ instruct far_cmpULLtGe_reg_imm0_branch(cmpOpULtGe cmp, iRegL op1, immL0 zero, la
 
   format %{ "j  $lbl if $cmp == ge\t#@far_cmpULLtGe_reg_imm0_branch" %}
 
-  ins_encode(riscv64_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));
+  ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));
 
   ins_pipe(pipe_cmpz_branch);
 %}
@@ -9687,7 +9771,7 @@ instruct far_cmpULLtGe_reg_imm0_loop(cmpOpULtGe cmp, iRegL op1, immL0 zero, labe
 
   format %{ "j  $lbl if $cmp == ge\t#@far_cmpULLtGe_reg_imm0_loop" %}
 
-  ins_encode(riscv64_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));
+  ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));
 
   ins_pipe(pipe_cmpz_branch);
 %}
@@ -9807,7 +9891,7 @@ instruct cmovI_cmpU(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOpU cop)
              "bneg$cop $op1, $op2, skip\t#@cmovI_cmpU\n\t"
              "mv $dst, $src\n\t"
              "skip:"
-         %}    
+         %}
 
   ins_encode %{
     __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask,
@@ -9898,7 +9982,8 @@ instruct cmovI_cmpUL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOpU cop)
 // Procedure Call/Return Instructions
 
 // Call Java Static Instruction
-
+// Note: If this code changes, the corresponding ret_addr_offset() and
+//       compute_padding() functions will have to be adjusted.
 instruct CallStaticJavaDirect(method meth)
 %{
   match(CallStaticJava);
@@ -9909,15 +9994,18 @@ instruct CallStaticJavaDirect(method meth)
 
   format %{ "CALL,static $meth\t#@CallStaticJavaDirect" %}
 
-  ins_encode( riscv64_enc_java_static_call(meth),
-              riscv64_enc_call_epilog );
+  ins_encode(riscv_enc_java_static_call(meth),
+             riscv_enc_call_epilog);
 
   ins_pipe(pipe_class_call);
+  ins_alignment(4);
 %}
 
 // TO HERE
 
 // Call Java Dynamic Instruction
+// Note: If this code changes, the corresponding ret_addr_offset() and
+//       compute_padding() functions will have to be adjusted.
 instruct CallDynamicJavaDirect(method meth, rFlagsReg cr)
 %{
   match(CallDynamicJava);
@@ -9928,10 +10016,11 @@ instruct CallDynamicJavaDirect(method meth, rFlagsReg cr)
 
   format %{ "CALL,dynamic $meth\t#@CallDynamicJavaDirect" %}
 
-  ins_encode( riscv64_enc_java_dynamic_call(meth),
-               riscv64_enc_call_epilog );
+  ins_encode(riscv_enc_java_dynamic_call(meth),
+             riscv_enc_call_epilog);
 
   ins_pipe(pipe_class_call);
+  ins_alignment(4);
 %}
 
 // Call Runtime Instruction
@@ -9946,7 +10035,7 @@ instruct CallRuntimeDirect(method meth, rFlagsReg cr)
 
   format %{ "CALL, runtime $meth\t#@CallRuntimeDirect" %}
 
-  ins_encode( riscv64_enc_java_to_runtime(meth) );
+  ins_encode(riscv_enc_java_to_runtime(meth));
 
   ins_pipe(pipe_class_call);
 %}
@@ -9963,7 +10052,7 @@ instruct CallLeafDirect(method meth, rFlagsReg cr)
 
   format %{ "CALL, runtime leaf $meth\t#@CallLeafDirect" %}
 
-  ins_encode( riscv64_enc_java_to_runtime(meth) );
+  ins_encode(riscv_enc_java_to_runtime(meth));
 
   ins_pipe(pipe_class_call);
 %}
@@ -9980,7 +10069,7 @@ instruct CallLeafNoFPDirect(method meth, rFlagsReg cr)
 
   format %{ "CALL, runtime leaf nofp $meth\t#@CallLeafNoFPDirect" %}
 
-  ins_encode( riscv64_enc_java_to_runtime(meth) );
+  ins_encode(riscv_enc_java_to_runtime(meth));
 
   ins_pipe(pipe_class_call);
 %}
@@ -9993,31 +10082,31 @@ instruct CallLeafNoFPDirect(method meth, rFlagsReg cr)
 // gen_subtype_check()).  Return zero for a hit.  The encoding
 // ALSO sets flags.
 
-instruct partialSubtypeCheck(rFlagsReg cr, iRegP_R14 sub, iRegP_R10 super, iRegP_R12 temp, iRegP_R15 result)
+instruct partialSubtypeCheck(iRegP_R15 result, iRegP_R14 sub, iRegP_R10 super, iRegP_R12 tmp, rFlagsReg cr)
 %{
   match(Set result (PartialSubtypeCheck sub super));
-  effect(KILL temp, KILL cr);
+  effect(KILL tmp, KILL cr);
 
   ins_cost(2 * STORE_COST + 3 * LOAD_COST + 4 * ALU_COST + BRANCH_COST * 4);
   format %{ "partialSubtypeCheck $result, $sub, $super\t#@partialSubtypeCheck" %}
 
-  ins_encode(riscv64_enc_partial_subtype_check(sub, super, temp, result));
+  ins_encode(riscv_enc_partial_subtype_check(sub, super, tmp, result));
 
   opcode(0x1); // Force zero of result reg on hit
 
   ins_pipe(pipe_class_memory);
 %}
 
-instruct partialSubtypeCheckVsZero(iRegP_R14 sub, iRegP_R10 super, iRegP_R12 temp, iRegP_R15 result,
-                                           immP0 zero, rFlagsReg cr)
+instruct partialSubtypeCheckVsZero(iRegP_R15 result, iRegP_R14 sub, iRegP_R10 super, iRegP_R12 tmp,
+                                   immP0 zero, rFlagsReg cr)
 %{
   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
-  effect(KILL temp, KILL result);
+  effect(KILL tmp, KILL result);
 
   ins_cost(2 * STORE_COST + 3 * LOAD_COST + 4 * ALU_COST + BRANCH_COST * 4);
   format %{ "partialSubtypeCheck $result, $sub, $super == 0\t#@partialSubtypeCheckVsZero" %}
 
-  ins_encode(riscv64_enc_partial_subtype_check(sub, super, temp, result));
+  ins_encode(riscv_enc_partial_subtype_check(sub, super, tmp, result));
 
   opcode(0x0); // Don't zero result reg on hit
 
@@ -10028,7 +10117,7 @@ instruct string_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R
                          iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
 %{
   predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU);
-  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
+  match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
   effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
 
   format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %}
@@ -10046,7 +10135,7 @@ instruct string_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R
                          iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
 %{
   predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL);
-  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
+  match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
   effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
 
   format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %}
@@ -10063,7 +10152,7 @@ instruct string_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_
                           iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
 %{
   predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL);
-  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
+  match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
   effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
 
   format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %}
@@ -10081,7 +10170,7 @@ instruct string_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_
                           rFlagsReg cr)
 %{
   predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU);
-  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
+  match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
   effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
 
   format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %}
@@ -10095,15 +10184,15 @@ instruct string_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_
 %}
 
 instruct string_indexofUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-       iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
-       iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg tmp)
+                          iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
+                          iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
 %{
   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result,
-         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL tmp);
-  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
 
+  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
   ins_encode %{
     __ string_indexof($str1$$Register, $str2$$Register,
                       $cnt1$$Register, $cnt2$$Register,
@@ -10116,15 +10205,15 @@ instruct string_indexofUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_
 %}
 
 instruct string_indexofLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-       iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
-       iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg tmp)
+                          iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
+                          iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
 %{
   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result,
-         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL tmp);
-  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
 
+  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
   ins_encode %{
     __ string_indexof($str1$$Register, $str2$$Register,
                       $cnt1$$Register, $cnt2$$Register,
@@ -10137,13 +10226,13 @@ instruct string_indexofLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_
 %}
 
 instruct string_indexofUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-       iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
-       iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg tmp)
+                          iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
+                          iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
 %{
   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result,
-         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL tmp);
+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
 
   ins_encode %{
@@ -10158,13 +10247,14 @@ instruct string_indexofUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_
 %}
 
 instruct string_indexof_conUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2,
-                 immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
-                 iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp)
+                              immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
+                              iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
 %{
   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result,
-         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp);
+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
+
   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
 
   ins_encode %{
@@ -10179,15 +10269,15 @@ instruct string_indexof_conUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2,
 %}
 
 instruct string_indexof_conLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2,
-                 immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
-                 iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp)
+                              immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
+                              iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
 %{
   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result,
-         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp);
-  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
 
+  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
   ins_encode %{
     int icnt2 = (int)$int_cnt2$$constant;
     __ string_indexof_linearscan($str1$$Register, $str2$$Register,
@@ -10200,15 +10290,15 @@ instruct string_indexof_conLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2,
 %}
 
 instruct string_indexof_conUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2,
-                 immI_1 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
-                 iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp)
+                              immI_1 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
+                              iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
 %{
   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result,
-         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp);
-  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
 
+  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
   ins_encode %{
     int icnt2 = (int)$int_cnt2$$constant;
     __ string_indexof_linearscan($str1$$Register, $str2$$Register,
@@ -10220,8 +10310,26 @@ instruct string_indexof_conUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2,
   ins_pipe(pipe_class_memory);
 %}
 
+instruct stringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
+                              iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
+                              iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
+%{
+  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
+  effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result,
+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
+
+  format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %}
+  ins_encode %{
+    __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
+                           $result$$Register, $tmp1$$Register, $tmp2$$Register,
+                           $tmp3$$Register, $tmp4$$Register, false /* isU */);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
+
 // clearing of an array
-instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy, rFlagsReg cr)
+instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy)
 %{
   match(Set dummy (ClearArray cnt base));
   effect(USE_KILL cnt, USE_KILL base);
@@ -10242,8 +10350,7 @@ instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy, rFlag
 
 instruct clearArray_imm_reg(immL cnt, iRegP_R28 base, Universe dummy, rFlagsReg cr)
 %{
-  predicate((uint64_t)n->in(2)->get_long()
-            < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
+  predicate((uint64_t)n->in(2)->get_long() < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
   match(Set dummy (ClearArray cnt base));
   effect(USE_KILL base, KILL cr);
 
@@ -10291,34 +10398,34 @@ instruct string_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
 
 instruct array_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
                        iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3,
-                       iRegP_R16 tmp4, iRegP_R28 tmp, rFlagsReg cr)
+                       iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr)
 %{
   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
   match(Set result (AryEq ary1 ary2));
-  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
+  effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr);
 
-  format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp" %}
+  format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp5" %}
   ins_encode %{
     __ arrays_equals($ary1$$Register, $ary2$$Register,
                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register,
-                     $result$$Register, $tmp$$Register, 1);
+                     $result$$Register, $tmp5$$Register, 1);
   %}
   ins_pipe(pipe_class_memory);
 %}
 
 instruct array_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
                        iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3,
-                       iRegP_R16 tmp4, iRegP_R28 tmp, rFlagsReg cr)
+                       iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr)
 %{
   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
   match(Set result (AryEq ary1 ary2));
-  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
+  effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr);
 
-  format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp" %}
+  format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp5" %}
   ins_encode %{
     __ arrays_equals($ary1$$Register, $ary2$$Register,
                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register,
-                     $result$$Register, $tmp$$Register, 2);
+                     $result$$Register, $tmp5$$Register, 2);
   %}
   ins_pipe(pipe_class_memory);
 %}
@@ -10359,49 +10466,31 @@ instruct tlsLoadP(javaThread_RegP dst)
   ins_pipe(pipe_class_empty);
 %}
 
-// Thread refetch:
-// take two main arguments:
-// 1. register @rthread
-// 2. one register which contains the `Coroutine *`
-// and move Coroutine->_thread to @rthread
-instruct tlsRefetchP(javaThread_RegP dst, iRegP src)
-%{
-  match(Set dst (ThreadRefetch src));
-
-  format %{ "Refetch the rthread register" %}
-
-  ins_encode %{
-    __ ld(xthread, Address($src$$Register, Coroutine::thread_offset()));
-  %}
-
-  ins_pipe(pipe_class_empty);
-%}
-
 // inlined locking and unlocking
 // using t1 as the 'flag' register to bridge the BoolNode producers and consumers
-instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
+instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2)
 %{
   match(Set cr (FastLock object box));
-  effect(TEMP tmp, TEMP tmp2);
+  effect(TEMP tmp1, TEMP tmp2);
 
   ins_cost(LOAD_COST * 2 + STORE_COST * 3 + ALU_COST * 6 + BRANCH_COST * 3);
-  format %{ "fastlock $object,$box\t! kills $tmp,$tmp2, #@cmpFastLock" %}
+  format %{ "fastlock $object,$box\t! kills $tmp1,$tmp2, #@cmpFastLock" %}
 
-  ins_encode(riscv64_enc_fast_lock(object, box, tmp, tmp2));
+  ins_encode(riscv_enc_fast_lock(object, box, tmp1, tmp2));
 
   ins_pipe(pipe_serial);
 %}
 
 // using t1 as the 'flag' register to bridge the BoolNode producers and consumers
-instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
+instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2)
 %{
   match(Set cr (FastUnlock object box));
-  effect(TEMP tmp, TEMP tmp2);
+  effect(TEMP tmp1, TEMP tmp2);
 
   ins_cost(LOAD_COST * 2 + STORE_COST + ALU_COST * 2 + BRANCH_COST * 4);
-  format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2, #@cmpFastUnlock" %}
+  format %{ "fastunlock $object,$box\t! kills $tmp1, $tmp2, #@cmpFastUnlock" %}
 
-  ins_encode(riscv64_enc_fast_unlock(object, box, tmp, tmp2));
+  ins_encode(riscv_enc_fast_unlock(object, box, tmp1, tmp2));
 
   ins_pipe(pipe_serial);
 %}
@@ -10418,7 +10507,7 @@ instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
 
   format %{ "jalr $jump_target\t# $method_oop holds method oop, #@TailCalljmpInd." %}
 
-  ins_encode(riscv64_enc_tail_call(jump_target));
+  ins_encode(riscv_enc_tail_call(jump_target));
 
   ins_pipe(pipe_class_call);
 %}
@@ -10431,7 +10520,7 @@ instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R10 ex_oop)
 
   format %{ "jalr $jump_target\t# $ex_oop holds exception oop, #@TailjmpInd." %}
 
-  ins_encode(riscv64_enc_tail_jmp(jump_target));
+  ins_encode(riscv_enc_tail_jmp(jump_target));
 
   ins_pipe(pipe_class_call);
 %}
@@ -10463,13 +10552,13 @@ instruct RethrowException()
 
   format %{ "j rethrow_stub\t#@RethrowException" %}
 
-  ins_encode( riscv64_enc_rethrow() );
+  ins_encode(riscv_enc_rethrow());
 
   ins_pipe(pipe_class_call);
 %}
 
 // Return Instruction
-// epilog node loads ret address into lr as part of frame pop
+// epilog node loads ret address into ra as part of frame pop
 instruct Ret()
 %{
   match(Return);
@@ -10477,7 +10566,7 @@ instruct Ret()
   ins_cost(BRANCH_COST);
   format %{ "ret\t// return register, #@Ret" %}
 
-  ins_encode(riscv64_enc_ret());
+  ins_encode(riscv_enc_ret());
 
   ins_pipe(pipe_branch);
 %}
@@ -10491,6 +10580,7 @@ instruct ShouldNotReachHere() %{
   format %{ "#@ShouldNotReachHere" %}
 
   ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
     if (is_reachable()) {
       __ halt();
     }
diff --git a/src/hotspot/cpu/riscv/riscv_b.ad b/src/hotspot/cpu/riscv/riscv_b.ad
new file mode 100644
index 00000000000..b9e04c432e1
--- /dev/null
+++ b/src/hotspot/cpu/riscv/riscv_b.ad
@@ -0,0 +1,451 @@
+//
+// Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+//
+
+// RISCV Bit-Manipulation Extension Architecture Description File
+
+// Convert oop into int for vectors alignment masking
+instruct convP2I_rvb(iRegINoSp dst, iRegP src) %{
+  predicate(UseRVB);
+  match(Set dst (ConvL2I (CastP2X src)));
+
+  format %{ "zext.w  $dst, $src\t# ptr -> int @convP2I_rvb" %}
+
+  ins_cost(ALU_COST);
+  ins_encode %{
+    __ zext_w(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// byte to int
+instruct convB2I_reg_reg_rvb(iRegINoSp dst, iRegIorL2I src, immI_24 lshift, immI_24 rshift) %{
+  predicate(UseRVB);
+  match(Set dst (RShiftI (LShiftI src lshift) rshift));
+
+  format %{ "sext.b  $dst, $src\t# b2i, #@convB2I_reg_reg_rvb" %}
+
+  ins_cost(ALU_COST);
+  ins_encode %{
+    __ sext_b(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// int to short
+instruct convI2S_reg_reg_rvb(iRegINoSp dst, iRegIorL2I src, immI_16 lshift, immI_16 rshift) %{
+  predicate(UseRVB);
+  match(Set dst (RShiftI (LShiftI src lshift) rshift));
+
+  format %{ "sext.h  $dst, $src\t# i2s, #@convI2S_reg_reg_rvb" %}
+
+  ins_cost(ALU_COST);
+  ins_encode %{
+    __ sext_h(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// short to unsigned int
+instruct convS2UI_reg_reg_rvb(iRegINoSp dst, iRegIorL2I src, immI_16bits mask) %{
+  predicate(UseRVB);
+  match(Set dst (AndI src mask));
+
+  format %{ "zext.h  $dst, $src\t# s2ui, #@convS2UI_reg_reg_rvb" %}
+
+  ins_cost(ALU_COST);
+  ins_encode %{
+    __ zext_h(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// int to unsigned long (zero extend)
+instruct convI2UL_reg_reg_rvb(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask) %{
+  predicate(UseRVB);
+  match(Set dst (AndL (ConvI2L src) mask));
+
+  format %{ "zext.w  $dst, $src\t# i2ul, #@convI2UL_reg_reg_rvb" %}
+
+  ins_cost(ALU_COST);
+  ins_encode %{
+    __ zext_w(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+// BSWAP instructions
+instruct bytes_reverse_int_rvb(iRegINoSp dst, iRegIorL2I src) %{
+  predicate(UseRVB);
+  match(Set dst (ReverseBytesI src));
+
+  ins_cost(ALU_COST * 2);
+  format %{ "revb_w_w  $dst, $src\t#@bytes_reverse_int_rvb" %}
+
+  ins_encode %{
+    __ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct bytes_reverse_long_rvb(iRegLNoSp dst, iRegL src) %{
+  predicate(UseRVB);
+  match(Set dst (ReverseBytesL src));
+
+  ins_cost(ALU_COST);
+  format %{ "rev8  $dst, $src\t#@bytes_reverse_long_rvb" %}
+
+  ins_encode %{
+    __ rev8(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct bytes_reverse_unsigned_short_rvb(iRegINoSp dst, iRegIorL2I src) %{
+  predicate(UseRVB);
+  match(Set dst (ReverseBytesUS src));
+
+  ins_cost(ALU_COST * 2);
+  format %{ "revb_h_h_u  $dst, $src\t#@bytes_reverse_unsigned_short_rvb" %}
+
+  ins_encode %{
+    __ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct bytes_reverse_short_rvb(iRegINoSp dst, iRegIorL2I src) %{
+  predicate(UseRVB);
+  match(Set dst (ReverseBytesS src));
+
+  ins_cost(ALU_COST * 2);
+  format %{ "revb_h_h  $dst, $src\t#@bytes_reverse_short_rvb" %}
+
+  ins_encode %{
+    __ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// Shift Add Pointer
+instruct shaddP_reg_reg_rvb(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale imm) %{
+  predicate(UseRVB);
+  match(Set dst (AddP src1 (LShiftL src2 imm)));
+
+  ins_cost(ALU_COST);
+  format %{ "shadd  $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_rvb" %}
+
+  ins_encode %{
+    __ shadd(as_Register($dst$$reg),
+             as_Register($src2$$reg),
+             as_Register($src1$$reg),
+             t0,
+             $imm$$constant);
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct shaddP_reg_reg_ext_rvb(iRegPNoSp dst, iRegP src1, iRegI src2, immIScale imm) %{
+  predicate(UseRVB);
+  match(Set dst (AddP src1 (LShiftL (ConvI2L src2) imm)));
+
+  ins_cost(ALU_COST);
+  format %{ "shadd  $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_ext_rvb" %}
+
+  ins_encode %{
+    __ shadd(as_Register($dst$$reg),
+             as_Register($src2$$reg),
+             as_Register($src1$$reg),
+             t0,
+             $imm$$constant);
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+// Shift Add Long
+instruct shaddL_reg_reg_rvb(iRegLNoSp dst, iRegL src1, iRegL src2, immIScale imm) %{
+  predicate(UseRVB);
+  match(Set dst (AddL src1 (LShiftL src2 imm)));
+
+  ins_cost(ALU_COST);
+  format %{ "shadd  $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_rvb" %}
+
+  ins_encode %{
+    __ shadd(as_Register($dst$$reg),
+             as_Register($src2$$reg),
+             as_Register($src1$$reg),
+             t0,
+             $imm$$constant);
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct shaddL_reg_reg_ext_rvb(iRegLNoSp dst, iRegL src1, iRegI src2, immIScale imm) %{
+  predicate(UseRVB);
+  match(Set dst (AddL src1 (LShiftL (ConvI2L src2) imm)));
+
+  ins_cost(ALU_COST);
+  format %{ "shadd  $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_ext_rvb" %}
+
+  ins_encode %{
+    __ shadd(as_Register($dst$$reg),
+             as_Register($src2$$reg),
+             as_Register($src1$$reg),
+             t0,
+             $imm$$constant);
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+// Zeros Count instructions
+instruct countLeadingZerosI_rvb(iRegINoSp dst, iRegIorL2I src) %{
+  predicate(UseRVB);
+  match(Set dst (CountLeadingZerosI src));
+
+  ins_cost(ALU_COST);
+  format %{ "clzw  $dst, $src\t#@countLeadingZerosI_rvb" %}
+
+  ins_encode %{
+    __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct countLeadingZerosL_rvb(iRegINoSp dst, iRegL src) %{
+  predicate(UseRVB);
+  match(Set dst (CountLeadingZerosL src));
+
+  ins_cost(ALU_COST);
+  format %{ "clz  $dst, $src\t#@countLeadingZerosL_rvb" %}
+
+  ins_encode %{
+    __ clz(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct countTrailingZerosI_rvb(iRegINoSp dst, iRegIorL2I src) %{
+  predicate(UseRVB);
+  match(Set dst (CountTrailingZerosI src));
+
+  ins_cost(ALU_COST);
+  format %{ "ctzw  $dst, $src\t#@countTrailingZerosI_rvb" %}
+
+  ins_encode %{
+    __ ctzw(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct countTrailingZerosL_rvb(iRegINoSp dst, iRegL src) %{
+  predicate(UseRVB);
+  match(Set dst (CountTrailingZerosL src));
+
+  ins_cost(ALU_COST);
+  format %{ "ctz  $dst, $src\t#@countTrailingZerosL_rvb" %}
+
+  ins_encode %{
+    __ ctz(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// Population Count instructions
+instruct popCountI_rvb(iRegINoSp dst, iRegIorL2I src) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountI src));
+
+  ins_cost(ALU_COST);
+  format %{ "cpopw  $dst, $src\t#@popCountI_rvb" %}
+
+  ins_encode %{
+    __ cpopw(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// Note: Long/bitCount(long) returns an int.
+instruct popCountL_rvb(iRegINoSp dst, iRegL src) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountL src));
+
+  ins_cost(ALU_COST);
+  format %{ "cpop  $dst, $src\t#@popCountL_rvb" %}
+
+  ins_encode %{
+    __ cpop(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// Max and Min
+instruct minI_reg_rvb(iRegINoSp dst, iRegI src1, iRegI src2) %{
+  predicate(UseRVB);
+  match(Set dst (MinI src1 src2));
+
+  ins_cost(ALU_COST);
+  format %{ "min  $dst, $src1, $src2\t#@minI_reg_rvb" %}
+
+  ins_encode %{
+    __ min(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct maxI_reg_rvb(iRegINoSp dst, iRegI src1, iRegI src2) %{
+  predicate(UseRVB);
+  match(Set dst (MaxI src1 src2));
+
+  ins_cost(ALU_COST);
+  format %{ "max  $dst, $src1, $src2\t#@maxI_reg_rvb" %}
+
+  ins_encode %{
+    __ max(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+// Abs
+instruct absI_reg_rvb(iRegINoSp dst, iRegI src) %{
+  predicate(UseRVB);
+  match(Set dst (AbsI src));
+
+  ins_cost(ALU_COST * 2);
+  format %{
+    "negw  t0, $src\n\t"
+    "max  $dst, $src, t0\t#@absI_reg_rvb"
+  %}
+
+  ins_encode %{
+    __ negw(t0, as_Register($src$$reg));
+    __ max(as_Register($dst$$reg), as_Register($src$$reg), t0);
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct absL_reg_rvb(iRegLNoSp dst, iRegL src) %{
+  predicate(UseRVB);
+  match(Set dst (AbsL src));
+
+  ins_cost(ALU_COST * 2);
+  format %{
+    "neg  t0, $src\n\t"
+    "max $dst, $src, t0\t#@absL_reg_rvb"
+  %}
+
+  ins_encode %{
+    __ neg(t0, as_Register($src$$reg));
+    __ max(as_Register($dst$$reg), as_Register($src$$reg), t0);
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// And Not
+instruct andnI_reg_reg_rvb(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{
+  predicate(UseRVB);
+  match(Set dst (AndI src1 (XorI src2 m1)));
+
+  ins_cost(ALU_COST);
+  format %{ "andn  $dst, $src1, $src2\t#@andnI_reg_reg_rvb" %}
+
+  ins_encode %{
+    __ andn(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct andnL_reg_reg_rvb(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{
+  predicate(UseRVB);
+  match(Set dst (AndL src1 (XorL src2 m1)));
+
+  ins_cost(ALU_COST);
+  format %{ "andn  $dst, $src1, $src2\t#@andnL_reg_reg_rvb" %}
+
+  ins_encode %{
+    __ andn(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+// Or Not
+instruct ornI_reg_reg_rvb(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{
+  predicate(UseRVB);
+  match(Set dst (OrI src1 (XorI src2 m1)));
+
+  ins_cost(ALU_COST);
+  format %{ "orn  $dst, $src1, $src2\t#@ornI_reg_reg_rvb" %}
+
+  ins_encode %{
+    __ orn(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct ornL_reg_reg_rvb(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{
+  predicate(UseRVB);
+  match(Set dst (OrL src1 (XorL src2 m1)));
+
+  ins_cost(ALU_COST);
+  format %{ "orn  $dst, $src1, $src2\t#@ornL_reg_reg_rvb" %}
+
+  ins_encode %{
+    __ orn(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
\ No newline at end of file
diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
index b5d2df2deb7..dc3ac548d73 100644
--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -71,7 +71,7 @@ class SimpleRuntimeFrame {
     // The frame sender code expects that fp will be in the "natural" place and
     // will override any oopMap setting for it. We must therefore force the layout
     // so that it agrees with the frame sender code.
-    // we don't expect any arg reg save area so riscv64 asserts that
+    // we don't expect any arg reg save area so riscv asserts that
     // frame::arg_reg_save_area_bytes == 0
     fp_off = 0, fp_off2,
     return_off, return_off2,
@@ -81,57 +81,56 @@ class SimpleRuntimeFrame {
 
 class RegisterSaver {
  public:
-  static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
-  static void restore_live_registers(MacroAssembler* masm);
+  RegisterSaver() {}
+  ~RegisterSaver() {}
+  OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
+  void restore_live_registers(MacroAssembler* masm);
 
   // Offsets into the register save area
   // Used by deoptimization when it is managing result register
   // values on its own
-  // gregs:30, float_register:32; except: x1(ra) & x2(sp)
+  // gregs:28, float_register:32; except: x1(ra) & x2(sp) & gp(x3) & tp(x4)
   // |---f0---|<---SP
   // |---f1---|
   // |   ..   |
   // |---f31--|
-  // |---zr---|
-  // |---x3---|
-  // |   x4   |
+  // |---reserved slot for stack alignment---|
+  // |---x5---|
+  // |   x6   |
   // |---.. --|
   // |---x31--|
   // |---fp---|
   // |---ra---|
-  static int reg_offset_in_bytes(Register r) {
-    assert (r->encoding() > 2, "ra and sp not saved");
-    return (32 /* floats*/ + r->encoding() - 2 /* x1, x2*/) * wordSize;
+  int f0_offset_in_bytes(void) {
+    return 0;
+  }
+  int reserved_slot_offset_in_bytes(void) {
+    return f0_offset_in_bytes() +
+           FloatRegisterImpl::max_slots_per_register *
+           FloatRegisterImpl::number_of_registers *
+           BytesPerInt;
+  }
+
+  int reg_offset_in_bytes(Register r) {
+    assert (r->encoding() > 4, "ra, sp, gp and tp not saved");
+    return reserved_slot_offset_in_bytes() + (r->encoding() - 4 /* x1, x2, x3, x4 */) * wordSize;
+  }
+
+  int freg_offset_in_bytes(FloatRegister f) {
+    return f0_offset_in_bytes() + f->encoding() * wordSize;
+  }
+
+  int ra_offset_in_bytes(void) {
+    return reserved_slot_offset_in_bytes() +
+           (RegisterImpl::number_of_registers - 3) *
+           RegisterImpl::max_slots_per_register *
+           BytesPerInt;
   }
-  static int x10_offset_in_bytes(void)        { return reg_offset_in_bytes(x10); } // x10
-  static int xmethod_offset_in_bytes(void)    { return reg_offset_in_bytes(xmethod); } // x31
-  static int tmp0_offset_in_bytes(void)       { return reg_offset_in_bytes(t0); } // x5
-  static int f0_offset_in_bytes(void)         { return 0; }
-  static int f10_offset_in_bytes(void)        { return 10 /* floats*/ * wordSize; }
-  static int return_offset_in_bytes(void)     { return return_off * BytesPerInt; }
-
-  // During deoptimization only the result registers need to be restored,
-  // all the other values have already been extracted.
-  static void restore_result_registers(MacroAssembler* masm);
-
-  // Capture info about frame layout
-  enum layout {
-      fpu_state_off = 0,
-      fpu_state_end = fpu_state_off + FPUStateSizeInWords - 1,
-      // The frame sender code expects that fp will be in
-      // the "natural" place and will override any oopMap
-      // setting for it. We must therefore force the layout
-      // so that it agrees with the frame sender code.
-      x0_off        = fpu_state_off + FPUStateSizeInWords,
-      fp_off        = x0_off + 30 * 2,
-      return_off    = fp_off + 2,      // slot for return address
-      reg_save_size = return_off + 2
-  };
 };
 
 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
   assert_cond(masm != NULL && total_frame_words != NULL);
-  int frame_size_in_bytes = align_up(additional_frame_words * wordSize + reg_save_size * BytesPerInt, 16);
+  int frame_size_in_bytes = align_up(additional_frame_words * wordSize + ra_offset_in_bytes() + wordSize, 16);
   // OopMap frame size is in compiler stack slots (jint's) not bytes or words
   int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
   // The caller will allocate additional_frame_words
@@ -153,20 +152,25 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
   OopMap* oop_map = new OopMap(frame_size_in_slots, 0);
   assert_cond(oop_maps != NULL && oop_map != NULL);
 
-  // ignore zr, ra and sp, being ignored also by push_CPU_state (pushing zr only for stack alignment)
-  for (int i = 3; i < RegisterImpl::number_of_registers; i++) {
-    Register r = as_Register(i);
-    if (r != xthread && r != t0 && r != t1) {
-      int sp_offset = 2 * ((i - 2) + 32); // SP offsets are in 4-byte words, register slots are 8 bytes
-      // wide, 32 floating-point registers
-      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset + additional_frame_slots), r->as_VMReg());
-    }
-  }
+  int sp_offset_in_slots = 0;
+  int step_in_slots = 0;
 
-  for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) {
+  step_in_slots = FloatRegisterImpl::max_slots_per_register;
+  for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) {
     FloatRegister r = as_FloatRegister(i);
-    int sp_offset = 2 * i;
-    oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), r->as_VMReg());
+    oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg());
+  }
+
+  step_in_slots = RegisterImpl::max_slots_per_register;
+  // skip the slot reserved for alignment, see MacroAssembler::push_reg;
+  // also skip x5 ~ x6 on the stack because they are caller-saved registers.
+  sp_offset_in_slots += RegisterImpl::max_slots_per_register * 3;
+  // besides, we ignore x0 ~ x4 because push_CPU_state won't push them on the stack.
+  for (int i = 7; i < RegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) {
+    Register r = as_Register(i);
+    if (r != xthread) {
+      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots + additional_frame_slots), r->as_VMReg());
+    }
   }
 
   return oop_map;
@@ -178,37 +182,18 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
   __ leave();
 }
 
-void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
-  // Just restore result register. Only used by deoptimization. By
-  // now any callee save register that needs to be restored to a c2
-  // caller of the deoptee has been extracted into the vframeArray
-  // and will be stuffed into the c2i adapter we create for later
-  // restoration so only result registers need to be restored here.
-  assert_cond(masm != NULL);
-  // Restore fp result register
-  __ fld(f10, Address(sp, f10_offset_in_bytes()));
-  // Restore integer result register
-  __ ld(x10, Address(sp, x10_offset_in_bytes()));
-
-  // Pop all of the register save are off the stack
-  __ add(sp, sp, align_up(return_offset_in_bytes(), 16));
-}
-
 // Is vector's size (in bytes) bigger than a size saved by default?
-// 8 bytes vector registers are saved by default on riscv64.
 bool SharedRuntime::is_wide_vector(int size) {
-  return size > 8;
+  return false;
 }
 
 size_t SharedRuntime::trampoline_size() {
-  // Byte size of function generate_trampoline. movptr_with_offset: 5 instructions, jalr: 1 instrction
-  return 6 * NativeInstruction::instruction_size; // lui + addi + slli + addi + slli + jalr
+  return 6 * NativeInstruction::instruction_size;
 }
 
 void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
-  assert_cond(masm != NULL);
   int32_t offset = 0;
-  __ movptr_with_offset(t0, destination, offset); // lui + addi + slli + addi + slli
+  __ movptr_with_offset(t0, destination, offset);
   __ jalr(x0, t0, offset);
 }
 
@@ -217,9 +202,9 @@ void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destinatio
 // (like the placement of the register window) the slots must be biased by
 // the following value.
 static int reg2offset_in(VMReg r) {
-  // Account for saved fp and lr
+  // Account for saved fp and ra
   // This should really be in_preserve_stack_slots
-  return (r->reg2stack() + 4) * VMRegImpl::stack_slot_size;
+  return r->reg2stack() * VMRegImpl::stack_slot_size;
 }
 
 static int reg2offset_out(VMReg r) {
@@ -341,11 +326,15 @@ static void patch_callers_callsite(MacroAssembler *masm) {
 #endif
 
   __ mv(c_rarg0, xmethod);
-  __ mv(c_rarg1, lr);
+  __ mv(c_rarg1, ra);
   int32_t offset = 0;
   __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)), offset);
   __ jalr(x1, t0, offset);
-  __ ifence();
+
+  // Explicit fence.i required because fixup_callers_callsite may change the code
+  // stream.
+  __ safepoint_ifence();
+
   __ pop_CPU_state();
   // restore sp
   __ leave();
@@ -644,7 +633,7 @@ int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
                                          VMRegPair *regs,
                                          VMRegPair *regs2,
                                          int total_args_passed) {
-  assert(regs2 == NULL, "not needed on riscv64");
+  assert(regs2 == NULL, "not needed on riscv");
 
   // We return the amount of VMRegImpl stack slots we need to reserve for all
   // the arguments NOT counting out_preserve_stack_slots.
@@ -910,14 +899,14 @@ void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type,
   // which by this time is free to use
   switch (ret_type) {
     case T_FLOAT:
-      __ fsw(f10, Address(fp, -wordSize));
+      __ fsw(f10, Address(fp, -3 * wordSize));
       break;
     case T_DOUBLE:
-      __ fsd(f10, Address(fp, -wordSize));
+      __ fsd(f10, Address(fp, -3 * wordSize));
       break;
     case T_VOID:  break;
     default: {
-      __ sd(x10, Address(fp, -wordSize));
+      __ sd(x10, Address(fp, -3 * wordSize));
     }
   }
 }
@@ -928,14 +917,14 @@ void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_ty
   // which by this time is free to use
   switch (ret_type) {
     case T_FLOAT:
-      __ flw(f10, Address(fp, -wordSize));
+      __ flw(f10, Address(fp, -3 * wordSize));
       break;
     case T_DOUBLE:
-      __ fld(f10, Address(fp, -wordSize));
+      __ fld(f10, Address(fp, -3 * wordSize));
       break;
     case T_VOID:  break;
     default: {
-      __ ld(x10, Address(fp, -wordSize));
+      __ ld(x10, Address(fp, -3 * wordSize));
     }
   }
 }
@@ -975,87 +964,7 @@ static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMR
   }
 }
 
-// Check GCLocker::needs_gc and enter the runtime if it's true.  This
-// keeps a new JNI critical region from starting until a GC has been
-// forced.  Save down any oops in registers and describe them in an
-// OopMap.
-static void check_needs_gc_for_critical_native(MacroAssembler* masm,
-                                               int stack_slots,
-                                               int total_c_args,
-                                               int total_in_args,
-                                               int arg_save_area,
-                                               OopMapSet* oop_maps,
-                                               VMRegPair* in_regs,
-                                               BasicType* in_sig_bt) { Unimplemented(); }
-
-// Unpack an array argument into a pointer to the body and the length
-// if the array is non-null, otherwise pass 0 for both.
-static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) { Unimplemented(); }
-
-class ComputeMoveOrder: public StackObj {
-  class MoveOperation: public ResourceObj {
-    friend class ComputeMoveOrder;
-   private:
-    VMRegPair        _src;
-    VMRegPair        _dst;
-    int              _src_index;
-    int              _dst_index;
-    bool             _processed;
-    MoveOperation*   _next;
-    MoveOperation*   _prev;
-
-    static int get_id(VMRegPair r) { Unimplemented(); return 0; }
-
-   public:
-    MoveOperation(int src_index, VMRegPair src, int dst_index, VMRegPair dst):
-      _src(src)
-    , _dst(dst)
-    , _src_index(src_index)
-    , _dst_index(dst_index)
-    , _processed(false)
-    , _next(NULL)
-    , _prev(NULL) { Unimplemented(); }
-
-    ~MoveOperation() {
-      _next = NULL;
-      _prev = NULL;
-    }
-
-    VMRegPair src() const              { Unimplemented(); return _src; }
-    int src_id() const                 { Unimplemented(); return 0; }
-    int src_index() const              { Unimplemented(); return 0; }
-    VMRegPair dst() const              { Unimplemented(); return _src; }
-    void set_dst(int i, VMRegPair dst) { Unimplemented(); }
-    int dst_index() const              { Unimplemented(); return 0; }
-    int dst_id() const                 { Unimplemented(); return 0; }
-    MoveOperation* next() const        { Unimplemented(); return 0; }
-    MoveOperation* prev() const        { Unimplemented(); return 0; }
-    void set_processed()               { Unimplemented(); }
-    bool is_processed() const          { Unimplemented(); return 0; }
-
-    // insert
-    void break_cycle(VMRegPair temp_register) { Unimplemented(); }
-
-    void link(GrowableArray<MoveOperation*>& killer) { Unimplemented(); }
-  };
-
- private:
-  GrowableArray<MoveOperation*> edges;
-
- public:
-  ComputeMoveOrder(int total_in_args, VMRegPair* in_regs, int total_c_args, VMRegPair* out_regs,
-                   BasicType* in_sig_bt, GrowableArray<int>& arg_order, VMRegPair tmp_vmreg) { Unimplemented(); }
-
-  ~ComputeMoveOrder() {}
-  // Collected all the move operations
-  void add_edge(int src_index, VMRegPair src, int dst_index, VMRegPair dst) { Unimplemented(); }
-
-  // Walk the edges breaking cycles between moves.  The result list
-  // can be walked in order to produce the proper set of loads
-  GrowableArray<MoveOperation*>* get_store_order(VMRegPair temp_register) { Unimplemented(); return 0; }
-};
-
-static void rt_call(MacroAssembler* masm, address dest, int gpargs, int fpargs, int type) {
+static void rt_call(MacroAssembler* masm, address dest) {
   assert_cond(masm != NULL);
   CodeBlob *cb = CodeCache::find_blob(dest);
   if (cb) {
@@ -1064,7 +973,6 @@ static void rt_call(MacroAssembler* masm, address dest, int gpargs, int fpargs,
     int32_t offset = 0;
     __ la_patchable(t0, RuntimeAddress(dest), offset);
     __ jalr(x1, t0, offset);
-    __ ifence();
   }
 }
 
@@ -1208,12 +1116,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
                                        in_ByteSize(-1),
                                        (OopMapSet*)NULL);
   }
-  bool is_critical_native = true;
-  address native_func = critical_entry;
-  if (native_func == NULL) {
-    native_func = method->native_function();
-    is_critical_native = false;
-  }
+  address native_func = method->native_function();
   assert(native_func != NULL, "must have function");
 
   // An OopMap for lock (and class if static)
@@ -1228,70 +1131,20 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
   // the hidden arguments as arg[0] and possibly arg[1] (static method)
 
   const int total_in_args = method->size_of_parameters();
-  int total_c_args = total_in_args;
-  if (!is_critical_native) {
-    total_c_args += 1;
-    if (method->is_static()) {
-      total_c_args++;
-    }
-  } else {
-    for (int i = 0; i < total_in_args; i++) {
-      if (in_sig_bt[i] == T_ARRAY) {
-        total_c_args++;
-      }
-    }
-  }
+  int total_c_args = total_in_args + (method->is_static() ? 2 : 1);
 
   BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
   VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
-  assert_cond(out_sig_bt != NULL && out_regs != NULL);
   BasicType* in_elem_bt = NULL;
 
   int argc = 0;
-  if (!is_critical_native) {
-    out_sig_bt[argc++] = T_ADDRESS;
-    if (method->is_static()) {
-      out_sig_bt[argc++] = T_OBJECT;
-    }
+  out_sig_bt[argc++] = T_ADDRESS;
+  if (method->is_static()) {
+    out_sig_bt[argc++] = T_OBJECT;
+  }
 
-    for (int i = 0; i < total_in_args ; i++) {
-      out_sig_bt[argc++] = in_sig_bt[i];
-    }
-  } else {
-    Thread* THREAD = Thread::current();
-    in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
-    assert_cond(in_elem_bt != NULL);
-    SignatureStream ss(method->signature());
-    for (int i = 0; i < total_in_args ; i++) {
-      if (in_sig_bt[i] == T_ARRAY) {
-        // Arrays are passed as int, elem* pair
-        out_sig_bt[argc++] = T_INT;
-        out_sig_bt[argc++] = T_ADDRESS;
-        Symbol* atype = ss.as_symbol(CHECK_NULL);
-        const char* at = atype->as_C_string();
-        if (strlen(at) == 2) {
-          assert(at[0] == '[', "must be");
-          switch (at[1]) {
-            case 'B': in_elem_bt[i]  = T_BYTE; break;
-            case 'C': in_elem_bt[i]  = T_CHAR; break;
-            case 'D': in_elem_bt[i]  = T_DOUBLE; break;
-            case 'F': in_elem_bt[i]  = T_FLOAT; break;
-            case 'I': in_elem_bt[i]  = T_INT; break;
-            case 'J': in_elem_bt[i]  = T_LONG; break;
-            case 'S': in_elem_bt[i]  = T_SHORT; break;
-            case 'Z': in_elem_bt[i]  = T_BOOLEAN; break;
-            default: ShouldNotReachHere();
-          }
-        }
-      } else {
-        out_sig_bt[argc++] = in_sig_bt[i];
-        in_elem_bt[i] = T_VOID;
-      }
-      if (in_sig_bt[i] != T_VOID) {
-        assert(in_sig_bt[i] == ss.type(), "must match");
-        ss.next();
-      }
-    }
+  for (int i = 0; i < total_in_args ; i++) {
+    out_sig_bt[argc++] = in_sig_bt[i];
   }
 
   // Now figure out where the args must be stored and how much stack space
@@ -1308,34 +1161,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
 
   // Now the space for the inbound oop handle area
   int total_save_slots = 8 * VMRegImpl::slots_per_word;  // 8 arguments passed in registers
-  if (is_critical_native) {
-    // Critical natives may have to call out so they need a save area
-    // for register arguments.
-    int double_slots = 0;
-    int single_slots = 0;
-    for ( int i = 0; i < total_in_args; i++) {
-      if (in_regs[i].first()->is_Register()) {
-        const Register reg = in_regs[i].first()->as_Register();
-        switch (in_sig_bt[i]) {
-          case T_BOOLEAN:
-          case T_BYTE:
-          case T_SHORT:
-          case T_CHAR:
-          case T_INT:  single_slots++; break;
-          case T_ARRAY:  // specific to LP64 (7145024)
-          case T_LONG: double_slots++; break;
-          default:  ShouldNotReachHere();
-        }
-      } else if (in_regs[i].first()->is_FloatRegister()) {
-        ShouldNotReachHere();
-      }
-    }
-    total_save_slots = double_slots * 2 + single_slots;
-    // align the save area
-    if (double_slots != 0) {
-      stack_slots = align_up(stack_slots, 2);
-    }
-  }
 
   int oop_handle_offset = stack_slots;
   stack_slots += total_save_slots;
@@ -1369,6 +1194,8 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
   //
   //
   // FP-> |                     |
+  //      | 2 slots (ra)        |
+  //      | 2 slots (fp)        |
   //      |---------------------|
   //      | 2 slots for moves   |
   //      |---------------------|
@@ -1425,11 +1252,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
   __ nop();
 
   // Generate stack overflow check
-  if (UseStackBanging) {
-    __ bang_stack_with_offset(JavaThread::stack_shadow_zone_size());
-  } else {
-    Unimplemented();
-  }
+  __ bang_stack_with_offset((int)JavaThread::stack_shadow_zone_size());
 
   // Generate a new frame for the wrapper.
   __ enter();
@@ -1444,10 +1267,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
 
   const Register oop_handle_reg = x18;
 
-  if (is_critical_native) {
-    check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args,
-                                       oop_handle_offset, oop_maps, in_regs, in_sig_bt);
-  }
   //
   // We immediately shuffle the arguments so that any vm call we have to
   // make from here on out (sync slow path, jvmti, etc.) we will have
@@ -1492,22 +1311,14 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
 
 #endif /* ASSERT */
 
-  // This may iterate in two different directions depending on the
-  // kind of native it is.  The reason is that for regular JNI natives
-  // the incoming and outgoing registers are offset upwards and for
-  // critical natives they are offset down.
+  // For JNI natives the incoming and outgoing registers are offset upwards.
   GrowableArray<int> arg_order(2 * total_in_args);
   VMRegPair tmp_vmreg;
   tmp_vmreg.set2(x9->as_VMReg());
 
-  if (!is_critical_native) {
-    for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
-      arg_order.push(i);
-      arg_order.push(c_arg);
-    }
-  } else {
-    // Compute a valid move order, using tmp_vmreg to break any cycles
-    ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg);
+  for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
+    arg_order.push(i);
+    arg_order.push(c_arg);
   }
 
   int temploc = -1;
@@ -1515,20 +1326,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
     int i = arg_order.at(ai);
     int c_arg = arg_order.at(ai + 1);
     __ block_comment(err_msg("mv %d -> %d", i, c_arg));
-    if (c_arg == -1) {
-      assert(is_critical_native, "should only be required for critical natives");
-      // This arg needs to be moved to a temporary
-      __ mv(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register());
-      in_regs[i] = tmp_vmreg;
-      temploc = i;
-      continue;
-    } else if (i == -1) {
-      assert(is_critical_native, "should only be required for critical natives");
-      // Read from the temporary location
-      assert(temploc != -1, "must be valid");
-      i = temploc;
-      temploc = -1;
-    }
+    assert(c_arg != -1 && i != -1, "wrong order");
 #ifdef ASSERT
     if (in_regs[i].first()->is_Register()) {
       assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
@@ -1543,22 +1341,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
 #endif /* ASSERT */
     switch (in_sig_bt[i]) {
       case T_ARRAY:
-        if (is_critical_native) {
-          unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
-          c_arg++;
-#ifdef ASSERT
-          if (out_regs[c_arg].first()->is_Register()) {
-            reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
-          } else if (out_regs[c_arg].first()->is_FloatRegister()) {
-            freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
-          }
-#endif
-          int_args++;
-          break;
-        }
-      // no break
       case T_OBJECT:
-        assert(!is_critical_native, "no oop arguments");
         object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
                     ((i == 0) && (!is_static)),
                     &receiver_offset);
@@ -1600,7 +1383,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
   int c_arg = total_c_args - total_in_args;
 
   // Pre-load a static method's oop into c_rarg1.
-  if (method->is_static() && !is_critical_native) {
+  if (method->is_static()) {
 
     //  load oop into a register
     __ movoop(c_rarg1,
@@ -1653,13 +1436,12 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
   const Register obj_reg  = x9;  // Will contain the oop
   const Register lock_reg = x30;  // Address of compiler lock object (BasicLock)
   const Register old_hdr  = x30;  // value of old header at unlock time
-  const Register tmp      = lr;
+  const Register tmp      = ra;
 
   Label slow_path_lock;
   Label lock_done;
 
   if (method->is_synchronized()) {
-    assert(!is_critical_native, "unhandled");
 
     const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
 
@@ -1707,7 +1489,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
     __ bnez(swap_reg, slow_path_lock);
 
     // Slow path will re-enter here
-
     __ bind(lock_done);
   }
 
@@ -1715,9 +1496,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
   // Finally just about ready to make the JNI call
 
   // get JNIEnv* which is first argument to native
-  if (!is_critical_native) {
-    __ la(c_rarg0, Address(xthread, in_bytes(JavaThread::jni_environment_offset())));
-  }
+  __ la(c_rarg0, Address(xthread, in_bytes(JavaThread::jni_environment_offset())));
 
   // Now set thread in native
   __ la(t1, Address(xthread, JavaThread::thread_state_offset()));
@@ -1725,33 +1504,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
   __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
   __ sw(t0, Address(t1));
 
-  {
-    int return_type = 0;
-    switch (ret_type) {
-      case T_VOID: break;
-        return_type = 0; break;
-      case T_CHAR:
-      case T_BYTE:
-      case T_SHORT:
-      case T_INT:
-      case T_BOOLEAN:
-      case T_LONG:
-        return_type = 1; break;
-      case T_ARRAY:
-      case T_OBJECT:
-        return_type = 1; break;
-      case T_FLOAT:
-        return_type = 2; break;
-      case T_DOUBLE:
-        return_type = 3; break;
-      default:
-        ShouldNotReachHere();
-    }
-    rt_call(masm, native_func,
-            int_args + 2, // riscv64 passes up to 8 args in int registers
-            float_args,   // and up to 8 float args
-            return_type);
-  }
+  rt_call(masm, native_func);
 
   __ bind(native_return);
 
@@ -1759,10 +1512,13 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
   oop_maps->add_gc_map(return_pc - start, map);
 
   // Unpack native results.
-  if(ret_type != T_OBJECT && ret_type != T_ARRAY) {
+  if (ret_type != T_OBJECT && ret_type != T_ARRAY) {
     __ cast_primitive_type(ret_type, x10);
   }
 
+  Label safepoint_in_progress, safepoint_in_progress_done;
+  Label after_transition;
+
   // Switch thread to "native transition" state before reading the synchronization state.
   // This additional state is necessary because reading and testing the synchronization
   // state is not atomic w.r.t. GC, as this scenario demonstrates:
@@ -1772,29 +1528,12 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
   //     didn't see any synchronization is progress, and escapes.
   __ mv(t0, _thread_in_native_trans);
 
-  if(os::is_MP()) {
-    if (UseMembar) {
-      __ sw(t0, Address(xthread, JavaThread::thread_state_offset()));
+  __ sw(t0, Address(xthread, JavaThread::thread_state_offset()));
 
-      // Force this write out before the read below
-      __ membar(MacroAssembler::AnyAny);
-    } else {
-      __ la(t1, Address(xthread, JavaThread::thread_state_offset()));
-      __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
-      __ sw(t0, Address(t1));
-
-      // Write serialization page so VM thread can do a pseudo remote membar.
-      // We use the current thread pointer to calculate a thread specific
-      // offset to write to within the page. This minimizes bus traffic
-      // due to cache line collision.
-      __ serialize_memory(xthread, x12, t0);
-    }
-  } else {
-    __ sw(t0, Address(xthread, JavaThread::thread_state_offset()));
-  }
+  // Force this write out before the read below
+  __ membar(MacroAssembler::AnyAny);
 
   // check for safepoint operation in progress and/or pending suspend requests
-  Label safepoint_in_progress, safepoint_in_progress_done;
   {
     __ safepoint_poll_acquire(safepoint_in_progress);
     __ lwu(t0, Address(xthread, JavaThread::suspend_flags_offset()));
@@ -1803,7 +1542,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
   }
 
   // change thread state
-  Label after_transition;
   __ la(t1, Address(xthread, JavaThread::thread_state_offset()));
   __ mv(t0, _thread_in_Java);
   __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
@@ -1834,7 +1572,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
     }
 
     // Simple recursive lock?
-
     __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
     __ beqz(t0, done);
 
@@ -1843,7 +1580,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
       save_native_result(masm, ret_type, stack_slots);
     }
 
-
     // get address of the stack lock
     __ la(x10, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
     //  get old displaced header
@@ -1884,32 +1620,26 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
     __ sd(zr, Address(xthread, JavaThread::pending_jni_exception_check_fn_offset()));
   }
 
-  if (!is_critical_native) {
-    // reset handle block
-    __ ld(x12, Address(xthread, JavaThread::active_handles_offset()));
-    __ sd(zr, Address(x12, JNIHandleBlock::top_offset_in_bytes()));
-  }
+  // reset handle block
+  __ ld(x12, Address(xthread, JavaThread::active_handles_offset()));
+  __ sd(zr, Address(x12, JNIHandleBlock::top_offset_in_bytes()));
 
   __ leave();
 
-  if (!is_critical_native) {
-    // Any exception pending?
-    __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
-    __ bnez(t0, exception_pending);
-  }
+  // Any exception pending?
+  __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
+  __ bnez(t0, exception_pending);
 
   // We're done
   __ ret();
 
   // Unexpected paths are out of line and go here
 
-  if (!is_critical_native) {
-    // forward the exception
-    __ bind(exception_pending);
+  // forward the exception
+  __ bind(exception_pending);
 
-    // and forward the exception
-    __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
-  }
+  // and forward the exception
+  __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
 
   // Slow path locking & unlocking
   if (method->is_synchronized()) {
@@ -1946,7 +1676,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
     __ block_comment("Slow path unlock {");
     __ bind(slow_path_unlock);
 
-    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
+    if (ret_type == T_FLOAT || ret_type == T_DOUBLE) {
       save_native_result(masm, ret_type, stack_slots);
     }
 
@@ -1959,7 +1689,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
     __ ld(x9, Address(xthread, in_bytes(Thread::pending_exception_offset())));
     __ sd(zr, Address(xthread, in_bytes(Thread::pending_exception_offset())));
 
-    rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), 3, 0, 1);
+    rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
 
 #ifdef ASSERT
     {
@@ -1973,7 +1703,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
 
     __ sd(x9, Address(xthread, in_bytes(Thread::pending_exception_offset())));
 
-    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
+    if (ret_type == T_FLOAT || ret_type == T_DOUBLE) {
       restore_native_result(masm, ret_type, stack_slots);
     }
     __ j(unlock_done);
@@ -1986,7 +1716,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
 
   __ bind(reguard);
   save_native_result(masm, ret_type, stack_slots);
-  rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), 0, 0, 0);
+  rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
   restore_native_result(masm, ret_type, stack_slots);
   // and continue
   __ j(reguard_done);
@@ -2005,22 +1735,12 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
     assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
 #endif
     int32_t offset = 0;
-    if (!is_critical_native) {
-      __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)), offset);
-    } else {
-      __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)), offset);
-    }
+    __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)), offset);
     __ jalr(x1, t0, offset);
-    __ ifence();
+
     // Restore any method result value
     restore_native_result(masm, ret_type, stack_slots);
 
-    if (is_critical_native) {
-      // The call above performed the transition to thread_in_Java so
-      // skip the transition logic above.
-      __ j(after_transition);
-    }
-
     __ j(safepoint_in_progress_done);
     __ block_comment("} safepoint");
   }
@@ -2068,9 +1788,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
                                             in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
                                             oop_maps);
   assert(nm != NULL, "create native nmethod fail!");
-  if (is_critical_native) {
-    nm->set_lazy_critical_native(true);
-  }
   return nm;
 }
 
@@ -2099,6 +1816,7 @@ void SharedRuntime::generate_deopt_blob() {
   OopMap* map = NULL;
   OopMapSet *oop_maps = new OopMapSet();
   assert_cond(masm != NULL && oop_maps != NULL);
+  RegisterSaver reg_saver;
 
   // -------------
   // This code enters when returning to a de-optimized nmethod.  A return
@@ -2112,7 +1830,7 @@ void SharedRuntime::generate_deopt_blob() {
   // In the case of an exception pending when deoptimizing, we enter
   // with a return address on the stack that points after the call we patched
   // into the exception handler. We have the following register state from,
-  // e.g., the forward exception stub (see stubGenerator_riscv64.cpp).
+  // e.g., the forward exception stub (see stubGenerator_riscv.cpp).
   //    x10: exception oop
   //    x9: exception handler
   //    x13: throwing pc
@@ -2136,7 +1854,7 @@ void SharedRuntime::generate_deopt_blob() {
   // Prolog for non exception case!
 
   // Save everything in sight.
-  map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
+  map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
 
   // Normal deoptimization.  Save exec mode for unpack_frames.
   __ mvw(xcpool, Deoptimization::Unpack_deopt); // callee-saved
@@ -2148,7 +1866,7 @@ void SharedRuntime::generate_deopt_blob() {
   // return address is the pc describes what bci to do re-execute at
 
   // No need to update map as each call to save_live_registers will produce identical oopmap
-  (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
+  (void) reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
 
   __ mvw(xcpool, Deoptimization::Unpack_reexecute); // callee-saved
   __ j(cont);
@@ -2185,7 +1903,7 @@ void SharedRuntime::generate_deopt_blob() {
   // This is a somewhat fragile mechanism.
 
   // Save everything in sight.
-  map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
+  map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
 
   // Now it is safe to overwrite any register
 
@@ -2196,7 +1914,7 @@ void SharedRuntime::generate_deopt_blob() {
   // of the current frame. Then clear the field in JavaThread
 
   __ ld(x13, Address(xthread, JavaThread::exception_pc_offset()));
-  __ sd(x13, Address(fp, wordSize));
+  __ sd(x13, Address(fp, frame::return_addr_offset * wordSize));
   __ sd(zr, Address(xthread, JavaThread::exception_pc_offset()));
 
 #ifdef ASSERT
@@ -2261,7 +1979,7 @@ void SharedRuntime::generate_deopt_blob() {
   __ verify_oop(x10);
 
   // Overwrite the result registers with the exception results.
-  __ sd(x10, Address(sp, RegisterSaver::x10_offset_in_bytes()));
+  __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
 
   __ bind(noException);
 
@@ -2269,7 +1987,13 @@ void SharedRuntime::generate_deopt_blob() {
   // Now restore the result registers.  Everything else is either dead
   // or captured in the vframeArray.
 
-  RegisterSaver::restore_result_registers(masm);
+  // Restore fp result register
+  __ fld(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10)));
+  // Restore integer result register
+  __ ld(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
+
+  // Pop all of the register save area off the stack
+  __ add(sp, sp, frame_size_in_words * wordSize);
 
   // All of the register save area has been popped of the stack. Only the
   // return address remains.
@@ -2290,18 +2014,16 @@ void SharedRuntime::generate_deopt_blob() {
   __ sub(x12, x12, 2 * wordSize);
   __ add(sp, sp, x12);
   __ ld(fp, Address(sp, 0));
-  __ ld(lr, Address(sp, wordSize));
+  __ ld(ra, Address(sp, wordSize));
   __ addi(sp, sp, 2 * wordSize);
-  // LR should now be the return address to the caller (3)
+  // RA should now be the return address to the caller (3)
 
 #ifdef ASSERT
   // Compilers generate code that bang the stack by as much as the
   // interpreter would need. So this stack banging should never
   // trigger a fault. Verify that it does not on non product builds.
-  if (UseStackBanging) {
-    __ lwu(x9, Address(x15, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
-    __ bang_stack_size(x9, x12);
-  }
+  __ lwu(x9, Address(x15, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
+  __ bang_stack_size(x9, x12);
 #endif
   // Load address of array of frame pcs into x12
   __ ld(x12, Address(x15, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
@@ -2333,7 +2055,7 @@ void SharedRuntime::generate_deopt_blob() {
   __ ld(x9, Address(x14, 0));          // Load frame size
   __ addi(x14, x14, wordSize);
   __ sub(x9, x9, 2 * wordSize);        // We'll push pc and fp by hand
-  __ ld(lr, Address(x12, 0));          // Load pc
+  __ ld(ra, Address(x12, 0));          // Load pc
   __ addi(x12, x12, wordSize);
   __ enter();                          // Save old & set new fp
   __ sub(sp, sp, x9);                  // Prolog
@@ -2345,7 +2067,7 @@ void SharedRuntime::generate_deopt_blob() {
   __ bnez(x13, loop);
 
     // Re-push self-frame
-  __ ld(lr, Address(x12));
+  __ ld(ra, Address(x12));
   __ enter();
 
   // Allocate a full sized register save area.  We subtract 2 because
@@ -2353,8 +2075,8 @@ void SharedRuntime::generate_deopt_blob() {
   __ sub(sp, sp, (frame_size_in_words - 2) * wordSize);
 
   // Restore frame locals after moving the frame
-  __ fsd(f10, Address(sp, RegisterSaver::f10_offset_in_bytes()));
-  __ sd(x10, Address(sp, RegisterSaver::x10_offset_in_bytes()));
+  __ fsd(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10)));
+  __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
 
   // Call C code.  Need thread but NOT official VM entry
   // crud.  We cannot block on this call, no GC can happen.  Call should
@@ -2376,14 +2098,14 @@ void SharedRuntime::generate_deopt_blob() {
   // Set an oopmap for the call site
   // Use the same PC we used for the last java frame
   oop_maps->add_gc_map(the_pc - start,
-                       new OopMap( frame_size_in_words, 0 ));
+                       new OopMap(frame_size_in_words, 0));
 
   // Clear fp AND pc
   __ reset_last_Java_frame(true);
 
   // Collect return values
-  __ fld(f10, Address(sp, RegisterSaver::f10_offset_in_bytes()));
-  __ ld(x10, Address(sp, RegisterSaver::x10_offset_in_bytes()));
+  __ fld(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10)));
+  __ ld(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
 
   // Pop self-frame.
   __ leave();                           // Epilog
@@ -2417,16 +2139,15 @@ void SharedRuntime::generate_uncommon_trap_blob() {
 
   address start = __ pc();
 
-  // Push self-frame.  We get here with a return address in LR
+  // Push self-frame.  We get here with a return address in RA
   // and sp should be 16 byte aligned
   // push fp and retaddr by hand
   __ addi(sp, sp, -2 * wordSize);
-  __ sd(lr, Address(sp, wordSize));
+  __ sd(ra, Address(sp, wordSize));
   __ sd(fp, Address(sp, 0));
   // we don't expect an arg reg save area
 #ifndef PRODUCT
-  assert(frame::arg_reg_save_area_bytes == 0, "no"
-                                              "ame reg save area");
+  assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
 #endif
   // compiler left unloaded_class_index in j_rarg0 move to where the
   // runtime expects it.
@@ -2460,7 +2181,7 @@ void SharedRuntime::generate_uncommon_trap_blob() {
   OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0);
   assert_cond(oop_maps != NULL && map != NULL);
 
-  // location of rfp is known implicitly by the frame sender code
+  // location of fp is known implicitly by the frame sender code
 
   oop_maps->add_gc_map(__ pc() - start, map);
 
@@ -2495,20 +2216,18 @@ void SharedRuntime::generate_uncommon_trap_blob() {
   __ sub(x12, x12, 2 * wordSize);
   __ add(sp, sp, x12);
   __ ld(fp, sp, 0);
-  __ ld(lr, sp, wordSize);
+  __ ld(ra, sp, wordSize);
   __ addi(sp, sp, 2 * wordSize);
-  // LR should now be the return address to the caller (3) frame
+  // RA should now be the return address to the caller (3) frame
 
 #ifdef ASSERT
   // Compilers generate code that bang the stack by as much as the
   // interpreter would need. So this stack banging should never
   // trigger a fault. Verify that it does not on non product builds.
-  if (UseStackBanging) {
-    __ lwu(x11, Address(x14,
-                        Deoptimization::UnrollBlock::
-                        total_frame_sizes_offset_in_bytes()));
-    __ bang_stack_size(x11, x12);
-  }
+  __ lwu(x11, Address(x14,
+                      Deoptimization::UnrollBlock::
+                      total_frame_sizes_offset_in_bytes()));
+  __ bang_stack_size(x11, x12);
 #endif
 
   // Load address of array of frame pcs into x12 (address*)
@@ -2543,7 +2262,7 @@ void SharedRuntime::generate_uncommon_trap_blob() {
   __ bind(loop);
   __ ld(x11, Address(x15, 0));       // Load frame size
   __ sub(x11, x11, 2 * wordSize);    // We'll push pc and fp by hand
-  __ ld(lr, Address(x12, 0));        // Save return address
+  __ ld(ra, Address(x12, 0));        // Save return address
   __ enter();                        // and old fp & set new fp
   __ sub(sp, sp, x11);               // Prolog
   __ sd(sender_sp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable
@@ -2554,7 +2273,7 @@ void SharedRuntime::generate_uncommon_trap_blob() {
   __ add(x12, x12, wordSize);        // Bump array pointer (pcs)
   __ subw(x13, x13, 1);              // Decrement counter
   __ bgtz(x13, loop);
-  __ ld(lr, Address(x12, 0));        // save final return address
+  __ ld(ra, Address(x12, 0));        // save final return address
   // Re-push self-frame
   __ enter();                        // & old fp & set new fp
 
@@ -2621,9 +2340,10 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
   address call_pc = NULL;
   int frame_size_in_words = -1;
   bool cause_return = (poll_type == POLL_AT_RETURN);
+  RegisterSaver reg_saver;
 
   // Save Integer and Float registers.
-  map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
+  map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
 
   // The following is basically a call_VM.  However, we need the precise
   // address of the call in order to generate an oopmap. Hence, we do all the
@@ -2641,7 +2361,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
     // it later to determine if someone changed the return address for
     // us!
     __ ld(x18, Address(xthread, JavaThread::saved_exception_pc_offset()));
-    __ sd(x18, Address(fp, wordSize));
+    __ sd(x18, Address(fp, frame::return_addr_offset * wordSize));
   }
 
   // Do the call
@@ -2669,7 +2389,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
 
   // Exception pending
 
-  RegisterSaver::restore_live_registers(masm);
+  reg_saver.restore_live_registers(masm);
 
   __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
 
@@ -2679,7 +2399,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
   Label no_adjust, bail;
   if (SafepointMechanism::uses_thread_local_poll() && !cause_return) {
     // If our stashed return pc was modified by the runtime we avoid touching it
-    __ ld(t0, Address(fp, wordSize));
+    __ ld(t0, Address(fp, frame::return_addr_offset * wordSize));
     __ bne(x18, t0, no_adjust);
 
 #ifdef ASSERT
@@ -2699,13 +2419,13 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
 #endif
     // Adjust return pc forward to step over the safepoint poll instruction
     __ add(x18, x18, NativeInstruction::instruction_size);
-    __ sd(x18, Address(fp, wordSize));
+    __ sd(x18, Address(fp, frame::return_addr_offset * wordSize));
   }
 
   __ bind(no_adjust);
   // Normal exit, restore registers and exit.
 
-  RegisterSaver::restore_live_registers(masm);
+  reg_saver.restore_live_registers(masm);
   __ ret();
 
 #ifdef ASSERT
@@ -2739,6 +2459,7 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha
   assert_cond(masm != NULL);
 
   int frame_size_in_words = -1;
+  RegisterSaver reg_saver;
 
   OopMapSet *oop_maps = new OopMapSet();
   assert_cond(oop_maps != NULL);
@@ -2746,7 +2467,7 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha
 
   int start = __ offset();
 
-  map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
+  map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
 
   int frame_complete = __ offset();
 
@@ -2767,8 +2488,6 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha
 
   oop_maps->add_gc_map( __ offset() - start, map);
 
-  __ ifence();
-
   // x10 contains the address we are going to jump to assuming no exception got installed
 
   // clear last_Java_sp
@@ -2780,11 +2499,11 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha
 
   // get the returned Method*
   __ get_vm_result_2(xmethod, xthread);
-  __ sd(xmethod, Address(sp, RegisterSaver::reg_offset_in_bytes(xmethod)));
+  __ sd(xmethod, Address(sp, reg_saver.reg_offset_in_bytes(xmethod)));
 
   // x10 is where we want to jump, overwrite t0 which is saved and temporary
-  __ sd(x10, Address(sp, RegisterSaver::tmp0_offset_in_bytes()));
-  RegisterSaver::restore_live_registers(masm);
+  __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(t0)));
+  reg_saver.restore_live_registers(masm);
 
   // We are back the the original state on entry and ready to go.
 
@@ -2794,7 +2513,7 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha
 
   __ bind(pending);
 
-  RegisterSaver::restore_live_registers(masm);
+  reg_saver.restore_live_registers(masm);
 
   // exception pending => remove activation and forward to exception handler
 
@@ -2812,12 +2531,10 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha
 }
 
 #ifdef COMPILER2
-// This is here instead of runtime_riscv64.cpp because it uses SimpleRuntimeFrame
-//
 //------------------------------generate_exception_blob---------------------------
 // creates exception blob at the end
 // Using exception blob, this code is jumped from a compiled method.
-// (see emit_exception_handler in riscv64.ad file)
+// (see emit_exception_handler in riscv.ad file)
 //
 // Given an exception pc at a call we call into the runtime for the
 // handler in this method. This handler might merely restore state
@@ -2863,7 +2580,7 @@ void OptoRuntime::generate_exception_blob() {
   // push fp and retaddr by hand
   // Exception pc is 'return address' for stack walker
   __ addi(sp, sp, -2 * wordSize);
-  __ sd(lr, Address(sp, wordSize));
+  __ sd(ra, Address(sp, wordSize));
   __ sd(fp, Address(sp));
   // there are no callee save registers and we don't expect an
   // arg reg save area
@@ -2894,7 +2611,6 @@ void OptoRuntime::generate_exception_blob() {
   __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C)), offset);
   __ jalr(x1, t0, offset);
 
-  __ ifence();
 
   // handle_exception_C is a special VM call which does not require an explicit
   // instruction sync afterwards.
diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
index 26f39fa4a21..9970229c5c5 100644
--- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -48,6 +48,9 @@
 #ifdef COMPILER2
 #include "opto/runtime.hpp"
 #endif
+#if INCLUDE_ZGC
+#include "gc/z/zThreadLocalData.hpp"
+#endif
 
 // Declaration and definition of StubGenerator (no .hpp file).
 // For a more detailed description of the stub routine structure
@@ -98,7 +101,7 @@ class StubGenerator: public StubCodeGenerator {
   // There is no return from the stub itself as any Java result
   // is written to result
   //
-  // we save x1 (lr) as the return PC at the base of the frame and
+  // we save x1 (ra) as the return PC at the base of the frame and
   // link x8 (fp) below it as the frame pointer installing sp (x2)
   // into fp.
   //
@@ -123,80 +126,81 @@ class StubGenerator: public StubCodeGenerator {
   //     [ return_from_Java     ] <--- sp
   //     [ argument word n      ]
   //      ...
-  // -32 [ argument word 1      ]
-  // -31 [ saved f27            ] <--- sp_after_call
-  // -30 [ saved f26            ]
-  // -29 [ saved f25            ]
-  // -28 [ saved f24            ]
-  // -27 [ saved f23            ]
-  // -26 [ saved f22            ]
-  // -25 [ saved f21            ]
-  // -24 [ saved f20            ]
-  // -23 [ saved f19            ]
-  // -22 [ saved f18            ]
-  // -21 [ saved f9             ]
-  // -20 [ saved f8             ]
-  // -19 [ saved x27            ]
-  // -18 [ saved x26            ]
-  // -17 [ saved x25            ]
-  // -16 [ saved x24            ]
-  // -15 [ saved x23            ]
-  // -14 [ saved x22            ]
-  // -13 [ saved x21            ]
-  // -12 [ saved x20            ]
-  // -11 [ saved x19            ]
-  // -10 [ saved x18            ]
-  //  -9 [ saved x9             ]
-  //  -8 [ call wrapper   (x10) ]
-  //  -7 [ result         (x11) ]
-  //  -6 [ result type    (x12) ]
-  //  -5 [ method         (x13) ]
-  //  -4 [ entry point    (x14) ]
-  //  -3 [ parameters     (x15) ]
-  //  -2 [ parameter size (x16) ]
-  //  -1 [ thread         (x17) ]
-  //   0 [ saved fp       (x8)  ] <--- fp == saved sp (x2)
-  //   1 [ saved lr       (x1)  ]
+  // -34 [ argument word 1      ]
+  // -33 [ saved f27            ] <--- sp_after_call
+  // -32 [ saved f26            ]
+  // -31 [ saved f25            ]
+  // -30 [ saved f24            ]
+  // -29 [ saved f23            ]
+  // -28 [ saved f22            ]
+  // -27 [ saved f21            ]
+  // -26 [ saved f20            ]
+  // -25 [ saved f19            ]
+  // -24 [ saved f18            ]
+  // -23 [ saved f9             ]
+  // -22 [ saved f8             ]
+  // -21 [ saved x27            ]
+  // -20 [ saved x26            ]
+  // -19 [ saved x25            ]
+  // -18 [ saved x24            ]
+  // -17 [ saved x23            ]
+  // -16 [ saved x22            ]
+  // -15 [ saved x21            ]
+  // -14 [ saved x20            ]
+  // -13 [ saved x19            ]
+  // -12 [ saved x18            ]
+  // -11 [ saved x9             ]
+  // -10 [ call wrapper   (x10) ]
+  //  -9 [ result         (x11) ]
+  //  -8 [ result type    (x12) ]
+  //  -7 [ method         (x13) ]
+  //  -6 [ entry point    (x14) ]
+  //  -5 [ parameters     (x15) ]
+  //  -4 [ parameter size (x16) ]
+  //  -3 [ thread         (x17) ]
+  //  -2 [ saved fp       (x8)  ]
+  //  -1 [ saved ra       (x1)  ]
+  //   0 [                      ] <--- fp == saved sp (x2)
 
   // Call stub stack layout word offsets from fp
   enum call_stub_layout {
-    sp_after_call_off  = -31,
-
-    f27_off            = -31,
-    f26_off            = -30,
-    f25_off            = -29,
-    f24_off            = -28,
-    f23_off            = -27,
-    f22_off            = -26,
-    f21_off            = -25,
-    f20_off            = -24,
-    f19_off            = -23,
-    f18_off            = -22,
-    f9_off             = -21,
-    f8_off             = -20,
-
-    x27_off            = -19,
-    x26_off            = -18,
-    x25_off            = -17,
-    x24_off            = -16,
-    x23_off            = -15,
-    x22_off            = -14,
-    x21_off            = -13,
-    x20_off            = -12,
-    x19_off            = -11,
-    x18_off            = -10,
-    x9_off             =  -9,
-
-    call_wrapper_off   =  -8,
-    result_off         =  -7,
-    result_type_off    =  -6,
-    method_off         =  -5,
-    entry_point_off    =  -4,
-    parameters_off     =  -3,
-    parameter_size_off =  -2,
-    thread_off         =  -1,
-    fp_f               =   0,
-    retaddr_off        =   1,
+    sp_after_call_off  = -33,
+
+    f27_off            = -33,
+    f26_off            = -32,
+    f25_off            = -31,
+    f24_off            = -30,
+    f23_off            = -29,
+    f22_off            = -28,
+    f21_off            = -27,
+    f20_off            = -26,
+    f19_off            = -25,
+    f18_off            = -24,
+    f9_off             = -23,
+    f8_off             = -22,
+
+    x27_off            = -21,
+    x26_off            = -20,
+    x25_off            = -19,
+    x24_off            = -18,
+    x23_off            = -17,
+    x22_off            = -16,
+    x21_off            = -15,
+    x20_off            = -14,
+    x19_off            = -13,
+    x18_off            = -12,
+    x9_off             = -11,
+
+    call_wrapper_off   = -10,
+    result_off         = -9,
+    result_type_off    = -8,
+    method_off         = -7,
+    entry_point_off    = -6,
+    parameters_off     = -5,
+    parameter_size_off = -4,
+    thread_off         = -3,
+    fp_f               = -2,
+    retaddr_off        = -1,
   };
 
   address generate_call_stub(address& return_address) {
@@ -247,7 +251,7 @@ class StubGenerator: public StubCodeGenerator {
 
     // stub code
 
-    address riscv64_entry = __ pc();
+    address riscv_entry = __ pc();
 
     // set up frame and move sp to end of save area
     __ enter();
@@ -446,7 +450,7 @@ class StubGenerator: public StubCodeGenerator {
   // Note: Usually the parameters are removed by the callee. In case
   // of an exception crossing an activation frame boundary, that is
   // not the case if the callee is compiled code => need to setup the
-  // rsp.
+  // sp.
   //
   // x10: exception oop
 
@@ -497,7 +501,7 @@ class StubGenerator: public StubCodeGenerator {
   // x10: exception
   // x13: throwing pc
   //
-  // NOTE: At entry of this stub, exception-pc must be in LR !!
+  // NOTE: At entry of this stub, exception-pc must be in RA !!
 
   // NOTE: this is always used as a jump target within generated code
   // so it just needs to be generated code with no x86 prolog
@@ -506,7 +510,7 @@ class StubGenerator: public StubCodeGenerator {
     StubCodeMark mark(this, "StubRoutines", "forward exception");
     address start = __ pc();
 
-    // Upon entry, LR points to the return address returning into
+    // Upon entry, RA points to the return address returning into
     // Java (interpreted or compiled) code; i.e., the return address
     // becomes the throwing pc.
     //
@@ -530,24 +534,24 @@ class StubGenerator: public StubCodeGenerator {
 
     // call the VM to find the handler address associated with the
     // caller address. pass thread in x10 and caller pc (ret address)
-    // in x11. n.b. the caller pc is in lr, unlike x86 where it is on
+    // in x11. n.b. the caller pc is in ra, unlike x86 where it is on
     // the stack.
-    __ mv(c_rarg1, lr);
-    // lr will be trashed by the VM call so we move it to x9
+    __ mv(c_rarg1, ra);
+    // ra will be trashed by the VM call so we move it to x9
     // (callee-saved) because we also need to pass it to the handler
     // returned by this call.
-    __ mv(x9, lr);
+    __ mv(x9, ra);
     BLOCK_COMMENT("call exception_handler_for_return_address");
     __ call_VM_leaf(CAST_FROM_FN_PTR(address,
                          SharedRuntime::exception_handler_for_return_address),
                     xthread, c_rarg1);
-    // we should not really care that lr is no longer the callee
+    // we should not really care that ra is no longer the callee
     // address. we saved the value the handler needs in x9 so we can
     // just copy it to x13. however, the C2 handler will push its own
     // frame and then calls into the VM and the VM code asserts that
     // the PC for the frame above the handler belongs to a compiled
-    // Java method. So, we restore lr here to satisfy that assert.
-    __ mv(lr, x9);
+    // Java method. So, we restore ra here to satisfy that assert.
+    __ mv(ra, x9);
     // setup x10 & x13 & clear pending exception
     __ mv(x13, x9);
     __ mv(x9, x10);
@@ -583,7 +587,7 @@ class StubGenerator: public StubCodeGenerator {
   // Stack after saving c_rarg3:
   //    [tos + 0]: saved c_rarg3
   //    [tos + 1]: saved c_rarg2
-  //    [tos + 2]: saved lr
+  //    [tos + 2]: saved ra
   //    [tos + 3]: saved t1
   //    [tos + 4]: saved x10
   //    [tos + 5]: saved t0
@@ -630,7 +634,7 @@ class StubGenerator: public StubCodeGenerator {
     __ pusha();
     // debug(char* msg, int64_t pc, int64_t regs[])
     __ mv(c_rarg0, t0);             // pass address of error message
-    __ mv(c_rarg1, lr);             // pass return address
+    __ mv(c_rarg1, ra);             // pass return address
     __ mv(c_rarg2, sp);             // pass address of regs on stack
 #ifndef PRODUCT
     assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
@@ -888,7 +892,7 @@ class StubGenerator: public StubCodeGenerator {
 
     const Register src = x30, dst = x31, vl = x14, cnt = x15, tmp1 = x16, tmp2 = x17;
     assert_different_registers(s, d, cnt, vl, tmp, tmp1, tmp2);
-    Assembler::SEW sew = Assembler::elemBytes_to_sew(granularity);
+    Assembler::SEW sew = Assembler::elembytes_to_sew(granularity);
     Label loop_forward, loop_backward, done;
 
     __ mv(dst, d);
@@ -928,7 +932,6 @@ class StubGenerator: public StubCodeGenerator {
 
   void copy_memory(bool is_aligned, Register s, Register d,
                    Register count, Register tmp, int step) {
-
     if (UseRVV) {
       return copy_memory_v(s, d, count, tmp, step);
     }
@@ -1039,7 +1042,7 @@ class StubGenerator: public StubCodeGenerator {
 
   // Scan over array at a for count oops, verifying each one.
   // Preserves a and count, clobbers t0 and t1.
-  void verify_oop_array (size_t size, Register a, Register count, Register temp) {
+  void verify_oop_array(size_t size, Register a, Register count, Register temp) {
     Label loop, end;
     __ mv(t1, zr);
     __ slli(t0, count, exact_log2(size));
@@ -1598,8 +1601,8 @@ class StubGenerator: public StubCodeGenerator {
     __ bgtu(temp, t0, L_failed);
 
     // Have to clean up high 32 bits of 'src_pos' and 'dst_pos'.
-    __ clear_upper_bits(src_pos, 32);
-    __ clear_upper_bits(dst_pos, 32);
+    __ zero_extend(src_pos, src_pos, 32);
+    __ zero_extend(dst_pos, dst_pos, 32);
 
     BLOCK_COMMENT("arraycopy_range_checks done");
   }
@@ -1813,8 +1816,8 @@ class StubGenerator: public StubCodeGenerator {
     // Get array_header_in_bytes()
     int lh_header_size_width = exact_log2(Klass::_lh_header_size_mask + 1);
     int lh_header_size_msb = Klass::_lh_header_size_shift + lh_header_size_width;
-    __ slli(t0_offset, lh, registerSize - lh_header_size_msb);          // left shift to remove 24 ~ 32;
-    __ srli(t0_offset, t0_offset, registerSize - lh_header_size_width); // array_offset
+    __ slli(t0_offset, lh, XLEN - lh_header_size_msb);          // left shift to remove 24 ~ 32;
+    __ srli(t0_offset, t0_offset, XLEN - lh_header_size_width); // array_offset
 
     __ add(src, src, t0_offset);           // src array offset
     __ add(dst, dst, t0_offset);           // dst array offset
@@ -1843,20 +1846,16 @@ class StubGenerator: public StubCodeGenerator {
     __ j(RuntimeAddress(byte_copy_entry));
 
   __ BIND(L_copy_shorts);
-    __ slli(t0, src_pos, 1);
-    __ add(from, src, t0); // src_addr
-    __ slli(t0, dst_pos, 1);
-    __ add(to, dst, t0); // dst_addr
+    __ shadd(from, src_pos, src, t0, 1); // src_addr
+    __ shadd(to, dst_pos, dst, t0, 1); // dst_addr
     __ addw(count, scratch_length, zr); // length
     __ j(RuntimeAddress(short_copy_entry));
 
   __ BIND(L_copy_ints);
     __ andi(t0, x22_elsize, 1);
     __ bnez(t0, L_copy_longs);
-    __ slli(t0, src_pos, 2);
-    __ add(from, src, t0); // src_addr
-    __ slli(t0, dst_pos, 2);
-    __ add(to, dst, t0); // dst_addr
+    __ shadd(from, src_pos, src, t0, 2); // src_addr
+    __ shadd(to, dst_pos, dst, t0, 2); // dst_addr
     __ addw(count, scratch_length, zr); // length
     __ j(RuntimeAddress(int_copy_entry));
 
@@ -1874,10 +1873,8 @@ class StubGenerator: public StubCodeGenerator {
       BLOCK_COMMENT("} assert long copy done");
     }
 #endif
-    __ slli(t0, src_pos, 3);
-    __ add(from, src, t0); // src_addr
-    __ slli(t0, dst_pos, 3);
-    __ add(to, dst, t0); // dst_addr
+    __ shadd(from, src_pos, src, t0, 3); // src_addr
+    __ shadd(to, dst_pos, dst, t0, 3); // dst_addr
     __ addw(count, scratch_length, zr); // length
     __ j(RuntimeAddress(long_copy_entry));
 
@@ -1894,11 +1891,9 @@ class StubGenerator: public StubCodeGenerator {
     arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
                            t1, L_failed);
 
-    __ slli(t0, src_pos, LogBytesPerHeapOop);
-    __ add(from, t0, src);
+    __ shadd(from, src_pos, src, t0, LogBytesPerHeapOop);
     __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
-    __ slli(t0, dst_pos, LogBytesPerHeapOop);
-    __ add(to, t0, dst);
+    __ shadd(to, dst_pos, dst, t0, LogBytesPerHeapOop);
     __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
     __ addw(count, scratch_length, zr); // length
   __ BIND(L_plain_copy);
@@ -1919,11 +1914,9 @@ class StubGenerator: public StubCodeGenerator {
       __ load_klass(dst_klass, dst); // reload
 
       // Marshal the base address arguments now, freeing registers.
-      __ slli(t0, src_pos, LogBytesPerHeapOop);
-      __ add(from, t0, src);
+      __ shadd(from, src_pos, src, t0, LogBytesPerHeapOop);
       __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
-      __ slli(t0, dst_pos, LogBytesPerHeapOop);
-      __ add(to, t0, dst);
+      __ shadd(to, dst_pos, dst, t0, LogBytesPerHeapOop);
       __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
       __ addw(count, length, zr);           // length (reloaded)
       const Register sco_temp = c_rarg3;      // this register is free now
@@ -2084,8 +2077,7 @@ class StubGenerator: public StubCodeGenerator {
     // Note that the total length is no less than 8 bytes.
     if (t == T_BYTE || t == T_SHORT) {
       __ beqz(count, L_exit1);
-      __ slli(tmp_reg, count, shift);
-      __ add(to, to, tmp_reg); // points to the end
+      __ shadd(to, count, to, tmp_reg, shift); // points to the end
       __ sd(value, Address(to, -8)); // overwrite some elements
       __ bind(L_exit1);
       __ leave();
@@ -2145,7 +2137,7 @@ class StubGenerator: public StubCodeGenerator {
     generate_copy_longs(copy_f, c_rarg0, c_rarg1, t1, copy_forwards);
     generate_copy_longs(copy_b, c_rarg0, c_rarg1, t1, copy_backwards);
 
-    StubRoutines::riscv64::_zero_blocks = generate_zero_blocks();
+    StubRoutines::riscv::_zero_blocks = generate_zero_blocks();
 
     //*** jbyte
     // Always need aligned and unaligned versions
@@ -2354,11 +2346,9 @@ class StubGenerator: public StubCodeGenerator {
 
     if (isLU) {
       __ add(str1, str1, cnt2);
-      __ slli(t0, cnt2, 1);
-      __ add(str2, str2, t0);
+      __ shadd(str2, cnt2, str2, t0, 1);
     } else {
-      __ slli(t0, cnt2, 1);
-      __ add(str1, str1, t0);
+      __ shadd(str1, cnt2, str1, t0, 1);
       __ add(str2, str2, cnt2);
     }
     __ xorr(tmp3, tmp1, tmp2);
@@ -2387,9 +2377,10 @@ class StubGenerator: public StubCodeGenerator {
       __ addi(t0, cnt2, 16);
       __ beqz(t0, LOAD_LAST);
     __ bind(TAIL); // 1..15 characters left until last load (last 4 characters)
-      __ slli(t0, cnt2, 1);
-      __ add(cnt1, cnt1, t0); // Address of 8 bytes before last 4 characters in UTF-16 string
-      __ add(tmp2, tmp2, cnt2); // Address of 16 bytes before last 4 characters in Latin1 string
+      // Address of 8 bytes before last 4 characters in UTF-16 string
+      __ shadd(cnt1, cnt2, cnt1, t0, 1);
+      // Address of 16 bytes before last 4 characters in Latin1 string
+      __ add(tmp2, tmp2, cnt2);
       __ ld(tmp4, Address(cnt1, -8));
       // last 16 characters before last load
       compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2);
@@ -2521,10 +2512,10 @@ class StubGenerator: public StubCodeGenerator {
   }
 
   void generate_compare_long_strings() {
-    StubRoutines::riscv64::_compare_long_string_LL = generate_compare_long_string_same_encoding(true);
-    StubRoutines::riscv64::_compare_long_string_UU = generate_compare_long_string_same_encoding(false);
-    StubRoutines::riscv64::_compare_long_string_LU = generate_compare_long_string_different_encoding(true);
-    StubRoutines::riscv64::_compare_long_string_UL = generate_compare_long_string_different_encoding(false);
+    StubRoutines::riscv::_compare_long_string_LL = generate_compare_long_string_same_encoding(true);
+    StubRoutines::riscv::_compare_long_string_UU = generate_compare_long_string_same_encoding(false);
+    StubRoutines::riscv::_compare_long_string_LU = generate_compare_long_string_different_encoding(true);
+    StubRoutines::riscv::_compare_long_string_UL = generate_compare_long_string_different_encoding(false);
   }
 
   // x10 result
@@ -2549,7 +2540,7 @@ class StubGenerator: public StubCodeGenerator {
     // parameters
     Register result = x10, haystack = x11, haystack_len = x12, needle = x13, needle_len = x14;
     // temporary registers
-    Register mask1 = x20, match_mask = x21, first = x22, trailing_zero = x23, mask2 = x24, tmp = x25;
+    Register mask1 = x20, match_mask = x21, first = x22, trailing_zeros = x23, mask2 = x24, tmp = x25;
     // redefinitions
     Register ch1 = x28, ch2 = x29;
     RegSet spilled_regs = RegSet::range(x20, x25) + RegSet::range(x28, x29);
@@ -2570,9 +2561,13 @@ class StubGenerator: public StubCodeGenerator {
 
     // first is needle[0]
     __ andi(first, ch1, needle_isL ? 0xFF : 0xFFFF, first);
-    __ mv(mask1, haystack_isL ? 0x0101010101010101 : 0x0001000100010001);
+    uint64_t mask0101 = UCONST64(0x0101010101010101);
+    uint64_t mask0001 = UCONST64(0x0001000100010001);
+    __ mv(mask1, haystack_isL ? mask0101 : mask0001);
     __ mul(first, first, mask1);
-    __ mv(mask2, haystack_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff);
+    uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f);
+    uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff);
+    __ mv(mask2, haystack_isL ? mask7f7f : mask7fff);
     if (needle_isL != haystack_isL) {
       __ mv(tmp, ch1);
     }
@@ -2580,7 +2575,7 @@ class StubGenerator: public StubCodeGenerator {
     __ blez(haystack_len, L_SMALL);
 
     if (needle_isL != haystack_isL) {
-      __ inflate_lo32(ch1, tmp, match_mask, trailing_zero);
+      __ inflate_lo32(ch1, tmp, match_mask, trailing_zeros);
     }
     // xorr, sub, orr, notr, andr
     // compare and set match_mask[i] with 0x80/0x8000 (Latin1/UTF16) if ch2[i] == first[i]
@@ -2617,7 +2612,7 @@ class StubGenerator: public StubCodeGenerator {
     __ xorr(ch2, first, ch2);
     __ sub(match_mask, ch2, mask1);
     __ orr(ch2, ch2, mask2);
-    __ mv(trailing_zero, -1); // all bits set
+    __ mv(trailing_zeros, -1); // all bits set
     __ j(L_SMALL_PROCEED);
 
     __ align(OptoLoopAlignment);
@@ -2625,44 +2620,42 @@ class StubGenerator: public StubCodeGenerator {
     __ slli(haystack_len, haystack_len, LogBitsPerByte + haystack_chr_shift);
     __ neg(haystack_len, haystack_len);
     if (needle_isL != haystack_isL) {
-      __ inflate_lo32(ch1, tmp, match_mask, trailing_zero);
+      __ inflate_lo32(ch1, tmp, match_mask, trailing_zeros);
     }
     __ xorr(ch2, first, ch2);
     __ sub(match_mask, ch2, mask1);
     __ orr(ch2, ch2, mask2);
-    __ mv(trailing_zero, -1); // all bits set
+    __ mv(trailing_zeros, -1); // all bits set
 
     __ bind(L_SMALL_PROCEED);
-    __ srl(trailing_zero, trailing_zero, haystack_len); // mask. zeroes on useless bits.
+    __ srl(trailing_zeros, trailing_zeros, haystack_len); // mask. zeroes on useless bits.
     __ notr(ch2, ch2);
     __ andr(match_mask, match_mask, ch2);
-    __ andr(match_mask, match_mask, trailing_zero); // clear useless bits and check
+    __ andr(match_mask, match_mask, trailing_zeros); // clear useless bits and check
     __ beqz(match_mask, NOMATCH);
 
     __ bind(L_SMALL_HAS_ZERO_LOOP);
-    __ ctzc_bit(trailing_zero, match_mask, haystack_isL, ch2, tmp); // count trailing zeros
-    __ addi(trailing_zero, trailing_zero, haystack_isL ? 7 : 15);
+    __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, ch2, tmp); // count trailing zeros
+    __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15);
     __ mv(ch2, wordSize / haystack_chr_size);
     __ ble(needle_len, ch2, L_SMALL_CMP_LOOP_LAST_CMP2);
-    __ compute_index(haystack, trailing_zero, match_mask, result, ch2, tmp, haystack_isL);
-    __ mv(trailing_zero, wordSize / haystack_chr_size);
+    __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL);
+    __ mv(trailing_zeros, wordSize / haystack_chr_size);
     __ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH);
 
     __ bind(L_SMALL_CMP_LOOP);
-    __ slli(first, trailing_zero, needle_chr_shift);
-    __ add(first, needle, first);
-    __ slli(ch2, trailing_zero, haystack_chr_shift);
-    __ add(ch2, haystack, ch2);
+    __ shadd(first, trailing_zeros, needle, first, needle_chr_shift);
+    __ shadd(ch2, trailing_zeros, haystack, ch2, haystack_chr_shift);
     needle_isL ? __ lbu(first, Address(first)) : __ lhu(first, Address(first));
     haystack_isL ? __ lbu(ch2, Address(ch2)) : __ lhu(ch2, Address(ch2));
-    __ add(trailing_zero, trailing_zero, 1);
-    __ bge(trailing_zero, needle_len, L_SMALL_CMP_LOOP_LAST_CMP);
+    __ add(trailing_zeros, trailing_zeros, 1);
+    __ bge(trailing_zeros, needle_len, L_SMALL_CMP_LOOP_LAST_CMP);
     __ beq(first, ch2, L_SMALL_CMP_LOOP);
 
     __ bind(L_SMALL_CMP_LOOP_NOMATCH);
     __ beqz(match_mask, NOMATCH);
-    __ ctzc_bit(trailing_zero, match_mask, haystack_isL, tmp, ch2);
-    __ addi(trailing_zero, trailing_zero, haystack_isL ? 7 : 15);
+    __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, tmp, ch2);
+    __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15);
     __ add(result, result, 1);
     __ add(haystack, haystack, haystack_chr_size);
     __ j(L_SMALL_HAS_ZERO_LOOP);
@@ -2674,14 +2667,14 @@ class StubGenerator: public StubCodeGenerator {
 
     __ align(OptoLoopAlignment);
     __ bind(L_SMALL_CMP_LOOP_LAST_CMP2);
-    __ compute_index(haystack, trailing_zero, match_mask, result, ch2, tmp, haystack_isL);
+    __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL);
     __ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH);
     __ j(DONE);
 
     __ align(OptoLoopAlignment);
     __ bind(L_HAS_ZERO);
-    __ ctzc_bit(trailing_zero, match_mask, haystack_isL, tmp, ch2);
-    __ addi(trailing_zero, trailing_zero, haystack_isL ? 7 : 15);
+    __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, tmp, ch2);
+    __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15);
     __ slli(needle_len, needle_len, BitsPerByte * wordSize / 2);
     __ orr(haystack_len, haystack_len, needle_len); // restore needle_len(32bits)
     __ sub(result, result, 1); // array index from 0, so result -= 1
@@ -2691,28 +2684,26 @@ class StubGenerator: public StubCodeGenerator {
     __ srli(ch2, haystack_len, BitsPerByte * wordSize / 2);
     __ bge(needle_len, ch2, L_CMP_LOOP_LAST_CMP2);
     // load next 8 bytes from haystack, and increase result index
-    __ compute_index(haystack, trailing_zero, match_mask, result, ch2, tmp, haystack_isL);
+    __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL);
     __ add(result, result, 1);
-    __ mv(trailing_zero, wordSize / haystack_chr_size);
+    __ mv(trailing_zeros, wordSize / haystack_chr_size);
     __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH);
 
     // compare one char
     __ bind(L_CMP_LOOP);
-    __ slli(needle_len, trailing_zero, needle_chr_shift);
-    __ add(needle_len, needle, needle_len);
+    __ shadd(needle_len, trailing_zeros, needle, needle_len, needle_chr_shift);
     needle_isL ? __ lbu(needle_len, Address(needle_len)) : __ lhu(needle_len, Address(needle_len));
-    __ slli(ch2, trailing_zero, haystack_chr_shift);
-    __ add(ch2, haystack, ch2);
+    __ shadd(ch2, trailing_zeros, haystack, ch2, haystack_chr_shift);
     haystack_isL ? __ lbu(ch2, Address(ch2)) : __ lhu(ch2, Address(ch2));
-    __ add(trailing_zero, trailing_zero, 1); // next char index
+    __ add(trailing_zeros, trailing_zeros, 1); // next char index
     __ srli(tmp, haystack_len, BitsPerByte * wordSize / 2);
-    __ bge(trailing_zero, tmp, L_CMP_LOOP_LAST_CMP);
+    __ bge(trailing_zeros, tmp, L_CMP_LOOP_LAST_CMP);
     __ beq(needle_len, ch2, L_CMP_LOOP);
 
     __ bind(L_CMP_LOOP_NOMATCH);
     __ beqz(match_mask, L_HAS_ZERO_LOOP_NOMATCH);
-    __ ctzc_bit(trailing_zero, match_mask, haystack_isL, needle_len, ch2); // find next "first" char index
-    __ addi(trailing_zero, trailing_zero, haystack_isL ? 7 : 15);
+    __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, needle_len, ch2); // find next "first" char index
+    __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15);
     __ add(haystack, haystack, haystack_chr_size);
     __ j(L_HAS_ZERO_LOOP);
 
@@ -2723,7 +2714,7 @@ class StubGenerator: public StubCodeGenerator {
 
     __ align(OptoLoopAlignment);
     __ bind(L_CMP_LOOP_LAST_CMP2);
-    __ compute_index(haystack, trailing_zero, match_mask, result, ch2, tmp, haystack_isL);
+    __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL);
     __ add(result, result, 1);
     __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH);
     __ j(DONE);
@@ -2760,11 +2751,778 @@ class StubGenerator: public StubCodeGenerator {
 
   void generate_string_indexof_stubs()
   {
-    StubRoutines::riscv64::_string_indexof_linear_ll = generate_string_indexof_linear(true, true);
-    StubRoutines::riscv64::_string_indexof_linear_uu = generate_string_indexof_linear(false, false);
-    StubRoutines::riscv64::_string_indexof_linear_ul = generate_string_indexof_linear(true, false);
+    StubRoutines::riscv::_string_indexof_linear_ll = generate_string_indexof_linear(true, true);
+    StubRoutines::riscv::_string_indexof_linear_uu = generate_string_indexof_linear(false, false);
+    StubRoutines::riscv::_string_indexof_linear_ul = generate_string_indexof_linear(true, false);
+  }
+
+#ifdef COMPILER2
+  address generate_mulAdd()
+  {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "mulAdd");
+
+    address entry = __ pc();
+
+    const Register out     = x10;
+    const Register in      = x11;
+    const Register offset  = x12;
+    const Register len     = x13;
+    const Register k       = x14;
+    const Register tmp     = x28;
+
+    BLOCK_COMMENT("Entry:");
+    __ enter();
+    __ mul_add(out, in, offset, len, k, tmp);
+    __ leave();
+    __ ret();
+
+    return entry;
+  }
+
+  /**
+   *  Arguments:
+   *
+   *  Input:
+   *    c_rarg0   - x address
+   *    c_rarg1   - x length
+   *    c_rarg2   - y address
+   *    c_rarg3   - y length
+   *    c_rarg4   - z address
+   *    c_rarg5   - z length
+   */
+  address generate_multiplyToLen()
+  {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
+    address entry = __ pc();
+
+    const Register x     = x10;
+    const Register xlen  = x11;
+    const Register y     = x12;
+    const Register ylen  = x13;
+    const Register z     = x14;
+    const Register zlen  = x15;
+
+    const Register tmp1  = x16;
+    const Register tmp2  = x17;
+    const Register tmp3  = x7;
+    const Register tmp4  = x28;
+    const Register tmp5  = x29;
+    const Register tmp6  = x30;
+    const Register tmp7  = x31;
+
+    BLOCK_COMMENT("Entry:");
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+    __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
+    __ leave(); // required for proper stackwalking of RuntimeStub frame
+    __ ret();
+
+    return entry;
   }
 
+  address generate_squareToLen()
+  {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "squareToLen");
+    address entry = __ pc();
+
+    const Register x     = x10;
+    const Register xlen  = x11;
+    const Register z     = x12;
+    const Register zlen  = x13;
+    const Register y     = x14; // == x
+    const Register ylen  = x15; // == xlen
+
+    const Register tmp1  = x16;
+    const Register tmp2  = x17;
+    const Register tmp3  = x7;
+    const Register tmp4  = x28;
+    const Register tmp5  = x29;
+    const Register tmp6  = x30;
+    const Register tmp7  = x31;
+
+    BLOCK_COMMENT("Entry:");
+    __ enter();
+    __ mv(y, x);
+    __ mv(ylen, xlen);
+    __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
+    __ leave();
+    __ ret();
+
+    return entry;
+  }
+#endif
+
+#ifdef COMPILER2
+  class MontgomeryMultiplyGenerator : public MacroAssembler {
+
+    Register Pa_base, Pb_base, Pn_base, Pm_base, inv, Rlen, Ra, Rb, Rm, Rn,
+      Pa, Pb, Pn, Pm, Rhi_ab, Rlo_ab, Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2, Ri, Rj;
+
+    RegSet _toSave;
+    bool _squaring;
+
+  public:
+    MontgomeryMultiplyGenerator (Assembler *as, bool squaring)
+      : MacroAssembler(as->code()), _squaring(squaring) {
+
+      // Register allocation
+
+      Register reg = c_rarg0;
+      Pa_base = reg;       // Argument registers
+      if (squaring) {
+        Pb_base = Pa_base;
+      } else {
+        Pb_base = ++reg;
+      }
+      Pn_base = ++reg;
+      Rlen= ++reg;
+      inv = ++reg;
+      Pm_base = ++reg;
+
+                        // Working registers:
+      Ra =  ++reg;      // The current digit of a, b, n, and m.
+      Rb =  ++reg;
+      Rm =  ++reg;
+      Rn =  ++reg;
+
+      Pa =  ++reg;      // Pointers to the current/next digit of a, b, n, and m.
+      Pb =  ++reg;
+      Pm =  ++reg;
+      Pn =  ++reg;
+
+      tmp0 =  ++reg;    // Three registers which form a
+      tmp1 =  ++reg;    // triple-precision accumuator.
+      tmp2 =  ++reg;
+
+      Ri =  x6;         // Inner and outer loop indexes.
+      Rj =  x7;
+
+      Rhi_ab = x28;     // Product registers: low and high parts
+      Rlo_ab = x29;     // of a*b and m*n.
+      Rhi_mn = x30;
+      Rlo_mn = x31;
+
+      // x18 and up are callee-saved.
+      _toSave = RegSet::range(x18, reg) + Pm_base;
+    }
+
+  private:
+    void save_regs() {
+      push_reg(_toSave, sp);
+    }
+
+    void restore_regs() {
+      pop_reg(_toSave, sp);
+    }
+
+    template <typename T>
+    void unroll_2(Register count, T block) {
+      Label loop, end, odd;
+      beqz(count, end);
+      andi(t0, count, 0x1);
+      bnez(t0, odd);
+      align(16);
+      bind(loop);
+      (this->*block)();
+      bind(odd);
+      (this->*block)();
+      addi(count, count, -2);
+      bgtz(count, loop);
+      bind(end);
+    }
+
+    template <typename T>
+    void unroll_2(Register count, T block, Register d, Register s, Register tmp) {
+      Label loop, end, odd;
+      beqz(count, end);
+      andi(tmp, count, 0x1);
+      bnez(tmp, odd);
+      align(16);
+      bind(loop);
+      (this->*block)(d, s, tmp);
+      bind(odd);
+      (this->*block)(d, s, tmp);
+      addi(count, count, -2);
+      bgtz(count, loop);
+      bind(end);
+    }
+
+    void pre1(RegisterOrConstant i) {
+      block_comment("pre1");
+      // Pa = Pa_base;
+      // Pb = Pb_base + i;
+      // Pm = Pm_base;
+      // Pn = Pn_base + i;
+      // Ra = *Pa;
+      // Rb = *Pb;
+      // Rm = *Pm;
+      // Rn = *Pn;
+      if (i.is_register()) {
+        slli(t0, i.as_register(), LogBytesPerWord);
+      } else {
+        mv(t0, i.as_constant());
+        slli(t0, t0, LogBytesPerWord);
+      }
+
+      mv(Pa, Pa_base);
+      add(Pb, Pb_base, t0);
+      mv(Pm, Pm_base);
+      add(Pn, Pn_base, t0);
+
+      ld(Ra, Address(Pa));
+      ld(Rb, Address(Pb));
+      ld(Rm, Address(Pm));
+      ld(Rn, Address(Pn));
+
+      // Zero the m*n result.
+      mv(Rhi_mn, zr);
+      mv(Rlo_mn, zr);
+    }
+
+    // The core multiply-accumulate step of a Montgomery
+    // multiplication.  The idea is to schedule operations as a
+    // pipeline so that instructions with long latencies (loads and
+    // multiplies) have time to complete before their results are
+    // used.  This most benefits in-order implementations of the
+    // architecture but out-of-order ones also benefit.
+    void step() {
+      block_comment("step");
+      // MACC(Ra, Rb, tmp0, tmp1, tmp2);
+      // Ra = *++Pa;
+      // Rb = *--Pb;
+      mulhu(Rhi_ab, Ra, Rb);
+      mul(Rlo_ab, Ra, Rb);
+      addi(Pa, Pa, wordSize);
+      ld(Ra, Address(Pa));
+      addi(Pb, Pb, -wordSize);
+      ld(Rb, Address(Pb));
+      acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n from the
+                                            // previous iteration.
+      // MACC(Rm, Rn, tmp0, tmp1, tmp2);
+      // Rm = *++Pm;
+      // Rn = *--Pn;
+      mulhu(Rhi_mn, Rm, Rn);
+      mul(Rlo_mn, Rm, Rn);
+      addi(Pm, Pm, wordSize);
+      ld(Rm, Address(Pm));
+      addi(Pn, Pn, -wordSize);
+      ld(Rn, Address(Pn));
+      acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2);
+    }
+
+    void post1() {
+      block_comment("post1");
+
+      // MACC(Ra, Rb, tmp0, tmp1, tmp2);
+      // Ra = *++Pa;
+      // Rb = *--Pb;
+      mulhu(Rhi_ab, Ra, Rb);
+      mul(Rlo_ab, Ra, Rb);
+      acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2);  // The pending m*n
+      acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2);
+
+      // *Pm = Rm = tmp0 * inv;
+      mul(Rm, tmp0, inv);
+      sd(Rm, Address(Pm));
+
+      // MACC(Rm, Rn, tmp0, tmp1, tmp2);
+      // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0;
+      mulhu(Rhi_mn, Rm, Rn);
+
+#ifndef PRODUCT
+      // assert(m[i] * n[0] + tmp0 == 0, "broken Montgomery multiply");
+      {
+        mul(Rlo_mn, Rm, Rn);
+        add(Rlo_mn, tmp0, Rlo_mn);
+        Label ok;
+        beqz(Rlo_mn, ok);
+        stop("broken Montgomery multiply");
+        bind(ok);
+      }
+#endif
+      // We have very carefully set things up so that
+      // m[i]*n[0] + tmp0 == 0 (mod b), so we don't have to calculate
+      // the lower half of Rm * Rn because we know the result already:
+      // it must be -tmp0.  tmp0 + (-tmp0) must generate a carry iff
+      // tmp0 != 0.  So, rather than do a mul and an cad we just set
+      // the carry flag iff tmp0 is nonzero.
+      //
+      // mul(Rlo_mn, Rm, Rn);
+      // cad(zr, tmp0, Rlo_mn);
+      addi(t0, tmp0, -1);
+      sltu(t0, t0, tmp0); // Set carry iff tmp0 is nonzero
+      cadc(tmp0, tmp1, Rhi_mn, t0);
+      adc(tmp1, tmp2, zr, t0);
+      mv(tmp2, zr);
+    }
+
+    void pre2(Register i, Register len) {
+      block_comment("pre2");
+      // Pa = Pa_base + i-len;
+      // Pb = Pb_base + len;
+      // Pm = Pm_base + i-len;
+      // Pn = Pn_base + len;
+
+      sub(Rj, i, len);
+      // Rj == i-len
+
+      // Ra as temp register
+      slli(Ra, Rj, LogBytesPerWord);
+      add(Pa, Pa_base, Ra);
+      add(Pm, Pm_base, Ra);
+      slli(Ra, len, LogBytesPerWord);
+      add(Pb, Pb_base, Ra);
+      add(Pn, Pn_base, Ra);
+
+      // Ra = *++Pa;
+      // Rb = *--Pb;
+      // Rm = *++Pm;
+      // Rn = *--Pn;
+      add(Pa, Pa, wordSize);
+      ld(Ra, Address(Pa));
+      add(Pb, Pb, -wordSize);
+      ld(Rb, Address(Pb));
+      add(Pm, Pm, wordSize);
+      ld(Rm, Address(Pm));
+      add(Pn, Pn, -wordSize);
+      ld(Rn, Address(Pn));
+
+      mv(Rhi_mn, zr);
+      mv(Rlo_mn, zr);
+    }
+
+    void post2(Register i, Register len) {
+      block_comment("post2");
+      sub(Rj, i, len);
+
+      cad(tmp0, tmp0, Rlo_mn, t0); // The pending m*n, low part
+
+      // As soon as we know the least significant digit of our result,
+      // store it.
+      // Pm_base[i-len] = tmp0;
+      // Rj as temp register
+      slli(Rj, Rj, LogBytesPerWord);
+      add(Rj, Pm_base, Rj);
+      sd(tmp0, Address(Rj));
+
+      // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0;
+      cadc(tmp0, tmp1, Rhi_mn, t0); // The pending m*n, high part
+      adc(tmp1, tmp2, zr, t0);
+      mv(tmp2, zr);
+    }
+
+    // A carry in tmp0 after Montgomery multiplication means that we
+    // should subtract multiples of n from our result in m.  We'll
+    // keep doing that until there is no carry.
+    void normalize(Register len) {
+      block_comment("normalize");
+      // while (tmp0)
+      //   tmp0 = sub(Pm_base, Pn_base, tmp0, len);
+      Label loop, post, again;
+      Register cnt = tmp1, i = tmp2; // Re-use registers; we're done with them now
+      beqz(tmp0, post); {
+        bind(again); {
+          mv(i, zr);
+          mv(cnt, len);
+          slli(Rn, i, LogBytesPerWord);
+          add(Rm, Pm_base, Rn);
+          ld(Rm, Address(Rm));
+          add(Rn, Pn_base, Rn);
+          ld(Rn, Address(Rn));
+          li(t0, 1); // set carry flag, i.e. no borrow
+          align(16);
+          bind(loop); {
+            notr(Rn, Rn);
+            add(Rm, Rm, t0);
+            add(Rm, Rm, Rn);
+            sltu(t0, Rm, Rn);
+            slli(Rn, i, LogBytesPerWord); // Rn as temp register
+            add(Rn, Pm_base, Rn);
+            sd(Rm, Address(Rn));
+            add(i, i, 1);
+            slli(Rn, i, LogBytesPerWord);
+            add(Rm, Pm_base, Rn);
+            ld(Rm, Address(Rm));
+            add(Rn, Pn_base, Rn);
+            ld(Rn, Address(Rn));
+            sub(cnt, cnt, 1);
+          } bnez(cnt, loop);
+          addi(tmp0, tmp0, -1);
+          add(tmp0, tmp0, t0);
+        } bnez(tmp0, again);
+      } bind(post);
+    }
+
+    // Move memory at s to d, reversing words.
+    //    Increments d to end of copied memory
+    //    Destroys tmp1, tmp2
+    //    Preserves len
+    //    Leaves s pointing to the address which was in d at start
+    void reverse(Register d, Register s, Register len, Register tmp1, Register tmp2) {
+      assert(tmp1 < x28 && tmp2 < x28, "register corruption");
+
+      slli(tmp1, len, LogBytesPerWord);
+      add(s, s, tmp1);
+      mv(tmp1, len);
+      unroll_2(tmp1,  &MontgomeryMultiplyGenerator::reverse1, d, s, tmp2);
+      slli(tmp1, len, LogBytesPerWord);
+      sub(s, d, tmp1);
+    }
+    // [63...0] -> [31...0][63...32]
+    void reverse1(Register d, Register s, Register tmp) {
+      addi(s, s, -wordSize);
+      ld(tmp, Address(s));
+      ror_imm(tmp, tmp, 32, t0);
+      sd(tmp, Address(d));
+      addi(d, d, wordSize);
+    }
+
+    void step_squaring() {
+      // An extra ACC
+      step();
+      acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2);
+    }
+
+    void last_squaring(Register i) {
+      Label dont;
+      // if ((i & 1) == 0) {
+      andi(t0, i, 0x1);
+      bnez(t0, dont); {
+        // MACC(Ra, Rb, tmp0, tmp1, tmp2);
+        // Ra = *++Pa;
+        // Rb = *--Pb;
+        mulhu(Rhi_ab, Ra, Rb);
+        mul(Rlo_ab, Ra, Rb);
+        acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2);
+      } bind(dont);
+    }
+
+    void extra_step_squaring() {
+      acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2);  // The pending m*n
+
+      // MACC(Rm, Rn, tmp0, tmp1, tmp2);
+      // Rm = *++Pm;
+      // Rn = *--Pn;
+      mulhu(Rhi_mn, Rm, Rn);
+      mul(Rlo_mn, Rm, Rn);
+      addi(Pm, Pm, wordSize);
+      ld(Rm, Address(Pm));
+      addi(Pn, Pn, -wordSize);
+      ld(Rn, Address(Pn));
+    }
+
+    void post1_squaring() {
+      acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2);  // The pending m*n
+
+      // *Pm = Rm = tmp0 * inv;
+      mul(Rm, tmp0, inv);
+      sd(Rm, Address(Pm));
+
+      // MACC(Rm, Rn, tmp0, tmp1, tmp2);
+      // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0;
+      mulhu(Rhi_mn, Rm, Rn);
+
+#ifndef PRODUCT
+      // assert(m[i] * n[0] + tmp0 == 0, "broken Montgomery multiply");
+      {
+        mul(Rlo_mn, Rm, Rn);
+        add(Rlo_mn, tmp0, Rlo_mn);
+        Label ok;
+        beqz(Rlo_mn, ok); {
+          stop("broken Montgomery multiply");
+        } bind(ok);
+      }
+#endif
+      // We have very carefully set things up so that
+      // m[i]*n[0] + tmp0 == 0 (mod b), so we don't have to calculate
+      // the lower half of Rm * Rn because we know the result already:
+      // it must be -tmp0.  tmp0 + (-tmp0) must generate a carry iff
+      // tmp0 != 0.  So, rather than do a mul and a cad we just set
+      // the carry flag iff tmp0 is nonzero.
+      //
+      // mul(Rlo_mn, Rm, Rn);
+      // cad(zr, tmp, Rlo_mn);
+      addi(t0, tmp0, -1);
+      sltu(t0, t0, tmp0); // Set carry iff tmp0 is nonzero
+      cadc(tmp0, tmp1, Rhi_mn, t0);
+      adc(tmp1, tmp2, zr, t0);
+      mv(tmp2, zr);
+    }
+
+    // use t0 as carry
+    void acc(Register Rhi, Register Rlo,
+             Register tmp0, Register tmp1, Register tmp2) {
+      cad(tmp0, tmp0, Rlo, t0);
+      cadc(tmp1, tmp1, Rhi, t0);
+      adc(tmp2, tmp2, zr, t0);
+    }
+
+  public:
+    /**
+     * Fast Montgomery multiplication.  The derivation of the
+     * algorithm is in A Cryptographic Library for the Motorola
+     * DSP56000, Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.
+     *
+     * Arguments:
+     *
+     * Inputs for multiplication:
+     *   c_rarg0   - int array elements a
+     *   c_rarg1   - int array elements b
+     *   c_rarg2   - int array elements n (the modulus)
+     *   c_rarg3   - int length
+     *   c_rarg4   - int inv
+     *   c_rarg5   - int array elements m (the result)
+     *
+     * Inputs for squaring:
+     *   c_rarg0   - int array elements a
+     *   c_rarg1   - int array elements n (the modulus)
+     *   c_rarg2   - int length
+     *   c_rarg3   - int inv
+     *   c_rarg4   - int array elements m (the result)
+     *
+     */
+    address generate_multiply() {
+      Label argh, nothing;
+      bind(argh);
+      stop("MontgomeryMultiply total_allocation must be <= 8192");
+
+      align(CodeEntryAlignment);
+      address entry = pc();
+
+      beqz(Rlen, nothing);
+
+      enter();
+
+      // Make room.
+      li(Ra, 512);
+      bgt(Rlen, Ra, argh);
+      slli(Ra, Rlen, exact_log2(4 * sizeof(jint)));
+      sub(Ra, sp, Ra);
+      andi(sp, Ra, -2 * wordSize);
+
+      srliw(Rlen, Rlen, 1);  // length in longwords = len/2
+
+      {
+        // Copy input args, reversing as we go.  We use Ra as a
+        // temporary variable.
+        reverse(Ra, Pa_base, Rlen, Ri, Rj);
+        if (!_squaring)
+          reverse(Ra, Pb_base, Rlen, Ri, Rj);
+        reverse(Ra, Pn_base, Rlen, Ri, Rj);
+      }
+
+      // Push all call-saved registers and also Pm_base which we'll need
+      // at the end.
+      save_regs();
+
+#ifndef PRODUCT
+      // assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
+      {
+        ld(Rn, Address(Pn_base));
+        mul(Rlo_mn, Rn, inv);
+        li(t0, -1);
+        Label ok;
+        beq(Rlo_mn, t0, ok);
+        stop("broken inverse in Montgomery multiply");
+        bind(ok);
+      }
+#endif
+
+      mv(Pm_base, Ra);
+
+      mv(tmp0, zr);
+      mv(tmp1, zr);
+      mv(tmp2, zr);
+
+      block_comment("for (int i = 0; i < len; i++) {");
+      mv(Ri, zr); {
+        Label loop, end;
+        bge(Ri, Rlen, end);
+
+        bind(loop);
+        pre1(Ri);
+
+        block_comment("  for (j = i; j; j--) {"); {
+          mv(Rj, Ri);
+          unroll_2(Rj, &MontgomeryMultiplyGenerator::step);
+        } block_comment("  } // j");
+
+        post1();
+        addw(Ri, Ri, 1);
+        blt(Ri, Rlen, loop);
+        bind(end);
+        block_comment("} // i");
+      }
+
+      block_comment("for (int i = len; i < 2*len; i++) {");
+      mv(Ri, Rlen); {
+        Label loop, end;
+        slli(t0, Rlen, 1);
+        bge(Ri, t0, end);
+
+        bind(loop);
+        pre2(Ri, Rlen);
+
+        block_comment("  for (j = len*2-i-1; j; j--) {"); {
+          slliw(Rj, Rlen, 1);
+          subw(Rj, Rj, Ri);
+          subw(Rj, Rj, 1);
+          unroll_2(Rj, &MontgomeryMultiplyGenerator::step);
+        } block_comment("  } // j");
+
+        post2(Ri, Rlen);
+        addw(Ri, Ri, 1);
+        slli(t0, Rlen, 1);
+        blt(Ri, t0, loop);
+        bind(end);
+      }
+      block_comment("} // i");
+
+      normalize(Rlen);
+
+      mv(Ra, Pm_base);  // Save Pm_base in Ra
+      restore_regs();  // Restore caller's Pm_base
+
+      // Copy our result into caller's Pm_base
+      reverse(Pm_base, Ra, Rlen, Ri, Rj);
+
+      leave();
+      bind(nothing);
+      ret();
+
+      return entry;
+    }
+
+    /**
+     *
+     * Arguments:
+     *
+     * Inputs:
+     *   c_rarg0   - int array elements a
+     *   c_rarg1   - int array elements n (the modulus)
+     *   c_rarg2   - int length
+     *   c_rarg3   - int inv
+     *   c_rarg4   - int array elements m (the result)
+     *
+     */
+    address generate_square() {
+      Label argh;
+      bind(argh);
+      stop("MontgomeryMultiply total_allocation must be <= 8192");
+
+      align(CodeEntryAlignment);
+      address entry = pc();
+
+      enter();
+
+      // Make room.
+      li(Ra, 512);
+      bgt(Rlen, Ra, argh);
+      slli(Ra, Rlen, exact_log2(4 * sizeof(jint)));
+      sub(Ra, sp, Ra);
+      andi(sp, Ra, -2 * wordSize);
+
+      srliw(Rlen, Rlen, 1);  // length in longwords = len/2
+
+      {
+        // Copy input args, reversing as we go.  We use Ra as a
+        // temporary variable.
+        reverse(Ra, Pa_base, Rlen, Ri, Rj);
+        reverse(Ra, Pn_base, Rlen, Ri, Rj);
+      }
+
+      // Push all call-saved registers and also Pm_base which we'll need
+      // at the end.
+      save_regs();
+
+      mv(Pm_base, Ra);
+
+      mv(tmp0, zr);
+      mv(tmp1, zr);
+      mv(tmp2, zr);
+
+      block_comment("for (int i = 0; i < len; i++) {");
+      mv(Ri, zr); {
+        Label loop, end;
+        bind(loop);
+        bge(Ri, Rlen, end);
+
+        pre1(Ri);
+
+        block_comment("for (j = (i+1)/2; j; j--) {"); {
+          addi(Rj, Ri, 1);
+          srliw(Rj, Rj, 1);
+          unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring);
+        } block_comment("  } // j");
+
+        last_squaring(Ri);
+
+        block_comment("  for (j = i/2; j; j--) {"); {
+          srliw(Rj, Ri, 1);
+          unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring);
+        } block_comment("  } // j");
+
+        post1_squaring();
+        addi(Ri, Ri, 1);
+        blt(Ri, Rlen, loop);
+
+        bind(end);
+        block_comment("} // i");
+      }
+
+      block_comment("for (int i = len; i < 2*len; i++) {");
+      mv(Ri, Rlen); {
+        Label loop, end;
+        bind(loop);
+        slli(t0, Rlen, 1);
+        bge(Ri, t0, end);
+
+        pre2(Ri, Rlen);
+
+        block_comment("  for (j = (2*len-i-1)/2; j; j--) {"); {
+          slli(Rj, Rlen, 1);
+          sub(Rj, Rj, Ri);
+          sub(Rj, Rj, 1);
+          srliw(Rj, Rj, 1);
+          unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring);
+        } block_comment("  } // j");
+
+        last_squaring(Ri);
+
+        block_comment("  for (j = (2*len-i)/2; j; j--) {"); {
+          slli(Rj, Rlen, 1);
+          sub(Rj, Rj, Ri);
+          srliw(Rj, Rj, 1);
+          unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring);
+        } block_comment("  } // j");
+
+        post2(Ri, Rlen);
+        addi(Ri, Ri, 1);
+        slli(t0, Rlen, 1);
+        blt(Ri, t0, loop);
+
+        bind(end);
+        block_comment("} // i");
+      }
+
+      normalize(Rlen);
+
+      mv(Ra, Pm_base);  // Save Pm_base in Ra
+      restore_regs();  // Restore caller's Pm_base
+
+      // Copy our result into caller's Pm_base
+      reverse(Pm_base, Ra, Rlen, Ri, Rj);
+
+      leave();
+      ret();
+
+      return entry;
+    }
+  };
+#endif // COMPILER2
+
   // Continuation point for throwing of implicit exceptions that are
   // not handled in the current activation. Fabricates an exception
   // oop and initiates normal exception dispatching in this
@@ -2792,7 +3550,7 @@ class StubGenerator: public StubCodeGenerator {
     // Note that we only have to preserve callee-saved registers since
     // the compilers are responsible for supplying a continuation point
     // if they expect all registers to be preserved.
-    // n.b. riscv64 asserts that frame::arg_reg_save_area_bytes == 0
+    // n.b. riscv asserts that frame::arg_reg_save_area_bytes == 0
     assert_cond(runtime_entry != NULL);
     enum layout {
       fp_off = 0,
@@ -2817,12 +3575,12 @@ class StubGenerator: public StubCodeGenerator {
     // thread-local storage and also sets up last_Java_sp slightly
     // differently than the real call_VM
 
-    __ enter(); // Save FP and LR before call
+    __ enter(); // Save FP and RA before call
 
     assert(is_even(framesize / 2), "sp not 16-byte aligned");
 
-    // lr and fp are already in place
-    __ addi(sp, fp, 0 - (((unsigned)framesize - 4) << LogBytesPerInt)); // prolog
+    // ra and fp are already in place
+    __ addi(sp, fp, 0 - ((unsigned)framesize << LogBytesPerInt)); // prolog
 
     int frame_complete = __ pc() - start;
 
@@ -2851,7 +3609,6 @@ class StubGenerator: public StubCodeGenerator {
     oop_maps->add_gc_map(the_pc - start, map);
 
     __ reset_last_Java_frame(true);
-    __ ifence();
 
     __ leave();
 
@@ -2936,11 +3693,37 @@ class StubGenerator: public StubCodeGenerator {
     // arraycopy stubs used by compilers
     generate_arraycopy_stubs();
 
+#ifdef COMPILER2
+    if (UseMulAddIntrinsic) {
+      StubRoutines::_mulAdd = generate_mulAdd();
+    }
+
+    if (UseMultiplyToLenIntrinsic) {
+      StubRoutines::_multiplyToLen = generate_multiplyToLen();
+    }
+
+    if (UseSquareToLenIntrinsic) {
+      StubRoutines::_squareToLen = generate_squareToLen();
+    }
+
+    if (UseMontgomeryMultiplyIntrinsic) {
+      StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply");
+      MontgomeryMultiplyGenerator g(_masm, /*squaring*/false);
+      StubRoutines::_montgomeryMultiply = g.generate_multiply();
+    }
+
+    if (UseMontgomerySquareIntrinsic) {
+      StubCodeMark mark(this, "StubRoutines", "montgomerySquare");
+      MontgomeryMultiplyGenerator g(_masm, /*squaring*/true);
+      StubRoutines::_montgomerySquare = g.generate_square();
+    }
+#endif
+
     generate_compare_long_strings();
 
     generate_string_indexof_stubs();
 
-    StubRoutines::riscv64::set_completed();
+    StubRoutines::riscv::set_completed();
   }
 
  public:
diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
index 53edd653aa9..9202d9ec4b0 100644
--- a/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
+++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -34,26 +34,24 @@
 // Implementation of the platform-specific part of StubRoutines - for
 // a description of how to extend it, see the stubRoutines.hpp file.
 
-address StubRoutines::riscv64::_get_previous_sp_entry = NULL;
+address StubRoutines::riscv::_get_previous_sp_entry = NULL;
 
-address StubRoutines::riscv64::_f2i_fixup = NULL;
-address StubRoutines::riscv64::_f2l_fixup = NULL;
-address StubRoutines::riscv64::_d2i_fixup = NULL;
-address StubRoutines::riscv64::_d2l_fixup = NULL;
-address StubRoutines::riscv64::_float_sign_mask = NULL;
-address StubRoutines::riscv64::_float_sign_flip = NULL;
-address StubRoutines::riscv64::_double_sign_mask = NULL;
-address StubRoutines::riscv64::_double_sign_flip = NULL;
-address StubRoutines::riscv64::_zero_blocks = NULL;
-address StubRoutines::riscv64::_has_negatives = NULL;
-address StubRoutines::riscv64::_has_negatives_long = NULL;
-address StubRoutines::riscv64::_compare_long_string_LL = NULL;
-address StubRoutines::riscv64::_compare_long_string_UU = NULL;
-address StubRoutines::riscv64::_compare_long_string_LU = NULL;
-address StubRoutines::riscv64::_compare_long_string_UL = NULL;
-address StubRoutines::riscv64::_string_indexof_linear_ll = NULL;
-address StubRoutines::riscv64::_string_indexof_linear_uu = NULL;
-address StubRoutines::riscv64::_string_indexof_linear_ul = NULL;
-address StubRoutines::riscv64::_large_byte_array_inflate = NULL;
-address StubRoutines::riscv64::_method_entry_barrier = NULL;
-bool StubRoutines::riscv64::_completed = false;
+address StubRoutines::riscv::_f2i_fixup = NULL;
+address StubRoutines::riscv::_f2l_fixup = NULL;
+address StubRoutines::riscv::_d2i_fixup = NULL;
+address StubRoutines::riscv::_d2l_fixup = NULL;
+address StubRoutines::riscv::_float_sign_mask = NULL;
+address StubRoutines::riscv::_float_sign_flip = NULL;
+address StubRoutines::riscv::_double_sign_mask = NULL;
+address StubRoutines::riscv::_double_sign_flip = NULL;
+address StubRoutines::riscv::_zero_blocks = NULL;
+address StubRoutines::riscv::_compare_long_string_LL = NULL;
+address StubRoutines::riscv::_compare_long_string_UU = NULL;
+address StubRoutines::riscv::_compare_long_string_LU = NULL;
+address StubRoutines::riscv::_compare_long_string_UL = NULL;
+address StubRoutines::riscv::_string_indexof_linear_ll = NULL;
+address StubRoutines::riscv::_string_indexof_linear_uu = NULL;
+address StubRoutines::riscv::_string_indexof_linear_ul = NULL;
+address StubRoutines::riscv::_large_byte_array_inflate = NULL;
+
+bool StubRoutines::riscv::_completed = false;
diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
index 37d3523adb5..0c9445e18a7 100644
--- a/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
+++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -31,7 +31,7 @@
 // definition. See stubRoutines.hpp for a description on how to
 // extend it.
 
-static bool    returns_to_call_stub(address return_pc)   {
+static bool returns_to_call_stub(address return_pc) {
   return return_pc == _call_stub_return_address;
 }
 
@@ -40,7 +40,7 @@ enum platform_dependent_constants {
   code_size2 = 28000           // simply increase if too small (assembler will crash if too small)
 };
 
-class riscv64 {
+class riscv {
  friend class StubGenerator;
 
  private:
@@ -58,8 +58,6 @@ class riscv64 {
 
   static address _zero_blocks;
 
-  static address _has_negatives;
-  static address _has_negatives_long;
   static address _compare_long_string_LL;
   static address _compare_long_string_LU;
   static address _compare_long_string_UL;
@@ -69,54 +67,43 @@ class riscv64 {
   static address _string_indexof_linear_ul;
   static address _large_byte_array_inflate;
 
-  static address _method_entry_barrier;
-
   static bool _completed;
 
  public:
 
-  static address get_previous_sp_entry()
-  {
+  static address get_previous_sp_entry() {
     return _get_previous_sp_entry;
   }
 
-  static address f2i_fixup()
-  {
+  static address f2i_fixup() {
     return _f2i_fixup;
   }
 
-  static address f2l_fixup()
-  {
+  static address f2l_fixup() {
     return _f2l_fixup;
   }
 
-  static address d2i_fixup()
-  {
+  static address d2i_fixup() {
     return _d2i_fixup;
   }
 
-  static address d2l_fixup()
-  {
+  static address d2l_fixup() {
     return _d2l_fixup;
   }
 
-  static address float_sign_mask()
-  {
+  static address float_sign_mask() {
     return _float_sign_mask;
   }
 
-  static address float_sign_flip()
-  {
+  static address float_sign_flip() {
     return _float_sign_flip;
   }
 
-  static address double_sign_mask()
-  {
+  static address double_sign_mask() {
     return _double_sign_mask;
   }
 
-  static address double_sign_flip()
-  {
+  static address double_sign_flip() {
     return _double_sign_flip;
   }
 
@@ -124,14 +111,6 @@ class riscv64 {
     return _zero_blocks;
   }
 
-  static address has_negatives() {
-    return _has_negatives;
-  }
-
-  static address has_negatives_long() {
-    return _has_negatives_long;
-  }
-
   static address compare_long_string_LL() {
     return _compare_long_string_LL;
   }
@@ -164,10 +143,6 @@ class riscv64 {
     return _large_byte_array_inflate;
   }
 
-  static address method_entry_barrier() {
-    return _method_entry_barrier;
-  }
-
   static bool complete() {
     return _completed;
   }
diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
index 33f812e67ed..e639fa7e12f 100644
--- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
+++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -80,7 +80,7 @@ address TemplateInterpreterGenerator::generate_slow_signature_handler() {
 
   __ addi(sp, c_rarg3, -18 * wordSize);
   __ addi(sp, sp, -2 * wordSize);
-  __ sd(lr, Address(sp, 0));
+  __ sd(ra, Address(sp, 0));
 
   __ call_VM(noreg,
              CAST_FROM_FN_PTR(address,
@@ -101,8 +101,8 @@ address TemplateInterpreterGenerator::generate_slow_signature_handler() {
   //        bcp (NULL)
   //        ...
 
-  // Restore LR
-  __ ld(lr, Address(sp, 0));
+  // Restore ra
+  __ ld(ra, Address(sp, 0));
   __ addi(sp, sp , 2 * wordSize);
 
   // Do FP first so we can use c_rarg3 as temp
@@ -159,11 +159,11 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
   // stack:
   //        [ arg ] <-- esp
   //        [ arg ]
-  // retaddr in lr
+  // retaddr in ra
 
   address fn = NULL;
   address entry_point = NULL;
-  Register continuation = lr;
+  Register continuation = ra;
   switch (kind) {
     case Interpreter::java_lang_math_abs:
       entry_point = __ pc();
@@ -181,7 +181,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
       entry_point = __ pc();
       __ fld(f10, Address(esp));
       __ mv(sp, x30);
-      __ mv(x9, lr);
+      __ mv(x9, ra);
       continuation = x9;  // The first callee-saved register
       if (StubRoutines::dsin() == NULL) {
         fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsin);
@@ -195,7 +195,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
       entry_point = __ pc();
       __ fld(f10, Address(esp));
       __ mv(sp, x30);
-      __ mv(x9, lr);
+      __ mv(x9, ra);
       continuation = x9;  // The first callee-saved register
       if (StubRoutines::dcos() == NULL) {
         fn = CAST_FROM_FN_PTR(address, SharedRuntime::dcos);
@@ -209,7 +209,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
       entry_point = __ pc();
       __ fld(f10, Address(esp));
       __ mv(sp, x30);
-      __ mv(x9, lr);
+      __ mv(x9, ra);
       continuation = x9;  // The first callee-saved register
       if (StubRoutines::dtan() == NULL) {
         fn = CAST_FROM_FN_PTR(address, SharedRuntime::dtan);
@@ -223,7 +223,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
       entry_point = __ pc();
       __ fld(f10, Address(esp));
       __ mv(sp, x30);
-      __ mv(x9, lr);
+      __ mv(x9, ra);
       continuation = x9;  // The first callee-saved register
       if (StubRoutines::dlog() == NULL) {
         fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog);
@@ -237,7 +237,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
       entry_point = __ pc();
       __ fld(f10, Address(esp));
       __ mv(sp, x30);
-      __ mv(x9, lr);
+      __ mv(x9, ra);
       continuation = x9;  // The first callee-saved register
       if (StubRoutines::dlog10() == NULL) {
         fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10);
@@ -251,7 +251,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
       entry_point = __ pc();
       __ fld(f10, Address(esp));
       __ mv(sp, x30);
-      __ mv(x9, lr);
+      __ mv(x9, ra);
       continuation = x9;  // The first callee-saved register
       if (StubRoutines::dexp() == NULL) {
         fn = CAST_FROM_FN_PTR(address, SharedRuntime::dexp);
@@ -263,7 +263,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
       break;
     case Interpreter::java_lang_math_pow :
       entry_point = __ pc();
-      __ mv(x9, lr);
+      __ mv(x9, ra);
       continuation = x9;
       __ fld(f10, Address(esp, 2 * Interpreter::stackElementSize));
       __ fld(f11, Address(esp));
@@ -366,7 +366,7 @@ address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler() {
   // setup parameters
 
   // convention: expect aberrant index in register x11
-  __ zero_ext(c_rarg2, x11, 32);
+  __ zero_extend(c_rarg2, x11, 32);
   // convention: expect array in register x13
   __ mv(c_rarg1, x13);
   __ call_VM(noreg,
@@ -455,8 +455,7 @@ address TemplateInterpreterGenerator::generate_return_entry_for(TosState state,
   __ ld(x11, Address(x11, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
   __ andi(x11, x11, ConstantPoolCacheEntry::parameter_size_mask);
 
-  __ slli(t0, x11, 3);
-  __ add(esp, esp, t0);
+  __ shadd(esp, x11, esp, t0, 3);
 
   // Restore machine SP
   __ ld(t0, Address(xmethod, Method::const_offset()));
@@ -557,9 +556,10 @@ address TemplateInterpreterGenerator::generate_safept_entry_for(TosState state,
 //
 // xmethod: method
 //
-void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow,
-                                                         Label* profile_method,
-                                                         Label* profile_method_continue) {
+void TemplateInterpreterGenerator::generate_counter_incr(
+        Label* overflow,
+        Label* profile_method,
+        Label* profile_method_continue) {
   Label done;
   // Note: In tiered we increment either counters in Method* or in MDO depending if we're profiling or not.
   if (TieredCompilation) {
@@ -579,19 +579,19 @@ void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow,
     __ bind(no_mdo);
     // Increment counter in MethodCounters
     const Address invocation_counter(t1,
-                                     MethodCounters::invocation_counter_offset() +
-                                     InvocationCounter::counter_offset());
+                  MethodCounters::invocation_counter_offset() +
+                  InvocationCounter::counter_offset());
     __ get_method_counters(xmethod, t1, done);
     const Address mask(t1, in_bytes(MethodCounters::invoke_mask_offset()));
     __ increment_mask_and_jump(invocation_counter, increment, mask, t0, x11, false, overflow);
     __ bind(done);
   } else { // not TieredCompilation
     const Address backedge_counter(t1,
-                                   MethodCounters::backedge_counter_offset() +
-                                   InvocationCounter::counter_offset());
+                  MethodCounters::backedge_counter_offset() +
+                  InvocationCounter::counter_offset());
     const Address invocation_counter(t1,
-                                     MethodCounters::invocation_counter_offset() +
-                                     InvocationCounter::counter_offset());
+                  MethodCounters::invocation_counter_offset() +
+                  InvocationCounter::counter_offset());
 
     __ get_method_counters(xmethod, t1, done);
 
@@ -627,7 +627,7 @@ void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow,
       __ ld(t1, Address(xmethod, Method::method_counters_offset()));
       __ lwu(t1, Address(t1, in_bytes(MethodCounters::interpreter_invocation_limit_offset())));
       __ bltu(x10, t1, done);
-      __ j(*overflow); // offset is too large so we have to use j instead of bgeu here
+      __ j(*overflow);
     }
     __ bind(done);
   }
@@ -685,8 +685,7 @@ void TemplateInterpreterGenerator::generate_stack_overflow_check(void) {
 
   // locals + overhead, in bytes
   __ mv(x10, overhead_size);
-  __ slli(t0, x13, Interpreter::logStackElementSize);
-  __ add(x10, x10, t0);  // 2 slots per parameter.
+  __ shadd(x10, x13, x10, t0, Interpreter::logStackElementSize);  // 2 slots per parameter.
 
   const Address stack_limit(xthread, JavaThread::stack_overflow_limit_offset());
   __ ld(t0, stack_limit);
@@ -780,7 +779,7 @@ void TemplateInterpreterGenerator::lock_method() {
 // interpreted methods and for native methods hence the shared code.
 //
 // Args:
-//      lr: return address
+//      ra: return address
 //      xmethod: Method*
 //      xlocals: pointer to locals
 //      xcpool: cp cache
@@ -815,9 +814,18 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
   __ sd(ProfileInterpreter ? t0 : zr, Address(sp, 6 * wordSize));
 
   // Get mirror and store it in the frame as GC root for this Method*
-  __ load_mirror(t2, xmethod);
-  __ sd(zr, Address(sp, 5 * wordSize));
-  __ sd(t2, Address(sp, 4 * wordSize));
+#if INCLUDE_SHENANDOAHGC
+  if (UseShenandoahGC) {
+    __ load_mirror(x28, xmethod);
+    __ sd(zr, Address(sp, 5 * wordSize));
+    __ sd(x28, Address(sp, 4 * wordSize));
+  } else
+#endif
+  {
+    __ load_mirror(t2, xmethod);
+    __ sd(zr, Address(sp, 5 * wordSize));
+    __ sd(t2, Address(sp, 4 * wordSize));
+  }
 
   __ ld(xcpool, Address(xmethod, Method::const_offset()));
   __ ld(xcpool, Address(xcpool, ConstMethod::constants_offset()));
@@ -825,9 +833,9 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
   __ sd(xcpool, Address(sp, 3 * wordSize));
   __ sd(xlocals, Address(sp, 2 * wordSize));
 
-  __ sd(lr, Address(sp, 11 * wordSize));
+  __ sd(ra, Address(sp, 11 * wordSize));
   __ sd(fp, Address(sp, 10 * wordSize));
-  __ la(fp, Address(sp, 10 * wordSize));
+  __ la(fp, Address(sp, 12 * wordSize)); // include ra & fp
 
   // set sender sp
   // leave last_sp as null
@@ -882,7 +890,7 @@ address TemplateInterpreterGenerator::generate_Reference_get_entry(void) {
   // xmethod: Method*
   // x30: senderSP must preserve for slow path, set SP to it on fast path
 
-  // LR is live.  It must be saved around calls.
+  // ra is live.  It must be saved around calls.
 
   address entry = __ pc();
 
@@ -948,14 +956,12 @@ void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
   // Bang each page in the shadow zone. We can't assume it's been done for
   // an interpreter frame with greater than a page of locals, so each page
   // needs to be checked.  Only true for non-native.
-  if (UseStackBanging) {
-    const int n_shadow_pages = JavaThread::stack_shadow_zone_size() / os::vm_page_size();
-    const int start_page = native_call ? n_shadow_pages : 1;
-    const int page_size = os::vm_page_size();
-    for (int pages = start_page; pages <= n_shadow_pages; pages++) {
-      __ sub(t1, sp, pages * page_size);
-      __ sd(zr, Address(t1));
-    }
+  const int n_shadow_pages = JavaThread::stack_shadow_zone_size() / os::vm_page_size();
+  const int start_page = native_call ? n_shadow_pages : 1;
+  const int page_size = os::vm_page_size();
+  for (int pages = start_page; pages <= n_shadow_pages ; pages++) {
+    __ sub(t0, sp, pages * page_size);
+    __ sd(zr, Address(t0));
   }
 }
 
@@ -964,7 +970,7 @@ void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
 // native method than the typical interpreter frame setup.
 address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
   // determine code generation flags
-  bool inc_counter  = UseCompiler || CountCompiledCalls || LogTouchedMethods;
+  bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods;
 
   // x11: Method*
   // x30: sender sp
@@ -991,8 +997,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
   // for natives the size of locals is zero
 
   // compute beginning of parameters (xlocals)
-  __ slli(xlocals, x12, 3);
-  __ add(xlocals, esp, xlocals);
+  __ shadd(xlocals, x12, esp, xlocals, 3);
   __ addi(xlocals, xlocals, -wordSize);
 
   // Pull SP back to minimum size: this avoids holes in the stack
@@ -1160,7 +1165,6 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
   // Call the native method.
   __ jalr(x28);
   __ bind(native_return);
-  __ ifence();
   __ get_method(xmethod);
   // result potentially in x10 or f10
 
@@ -1182,18 +1186,8 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
   __ mv(t0, _thread_in_native_trans);
   __ sw(t0, Address(xthread, JavaThread::thread_state_offset()));
 
-  if (os::is_MP()) {
-    if (UseMembar) {
-      // Force this write out before the read below
-      __ membar(MacroAssembler::AnyAny);
-    } else {
-      // Write serialization page so VM thread can do a pseudo remote membar.
-      // We use the current thread pointer to calculate a thread specific
-      // offset to write to within the page. This minimizes bus traffic
-      // due to cache line collision.
-      __ serialize_memory(xthread, t0, t1);
-    }
-  }
+  // Force this write out before the read below
+  __ membar(MacroAssembler::AnyAny);
 
   // check for safepoint operation in progress and/or pending suspend requests
   {
@@ -1211,7 +1205,6 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
     __ mv(c_rarg0, xthread);
     __ mv(t1, CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans));
     __ jalr(t1);
-    __ ifence();
     __ get_method(xmethod);
     __ reinit_heapbase();
     __ bind(Continue);
@@ -1387,8 +1380,7 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
   generate_stack_overflow_check();
 
   // compute beginning of parameters (xlocals)
-  __ slli(t1, x12, 3);
-  __ add(xlocals, esp, t1);
+  __ shadd(xlocals, x12, esp, t1, 3);
   __ add(xlocals, xlocals, -wordSize);
 
   // Make room for additional locals
@@ -1499,7 +1491,7 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
       __ set_method_data_pointer_for_bcp();
       // don't think we need this
       __ get_method(x11);
-      __ jal(profile_method_continue);
+      __ j(profile_method_continue);
     }
     // Handle overflow of counter and compile method
     __ bind(invocation_counter_overflow);
@@ -1706,23 +1698,23 @@ void TemplateInterpreterGenerator::generate_throw_exception() {
   // following registers set up:
   //
   // x10: exception
-  // lr: return address/pc that threw exception
+  // ra: return address/pc that threw exception
   // sp: expression stack of caller
   // fp: fp of caller
-  // FIXME: There's no point saving LR here because VM calls don't trash it
+  // FIXME: There's no point saving ra here because VM calls don't trash it
   __ sub(sp, sp, 2 * wordSize);
   __ sd(x10, Address(sp, 0));                   // save exception
-  __ sd(lr, Address(sp, wordSize));             // save return address
+  __ sd(ra, Address(sp, wordSize));             // save return address
   __ super_call_VM_leaf(CAST_FROM_FN_PTR(address,
                                          SharedRuntime::exception_handler_for_return_address),
-                        xthread, lr);
+                        xthread, ra);
   __ mv(x11, x10);                              // save exception handler
   __ ld(x10, Address(sp, 0));                   // restore exception
-  __ ld(lr, Address(sp, wordSize));             // restore return address
+  __ ld(ra, Address(sp, wordSize));             // restore return address
   __ add(sp, sp, 2 * wordSize);
   // We might be returning to a deopt handler that expects x13 to
   // contain the exception pc
-  __ mv(x13, lr);
+  __ mv(x13, ra);
   // Note that an "issuing PC" is actually the next PC after the call
   __ jr(x11);                                   // jump to exception
                                                 // handler of caller
@@ -1789,14 +1781,14 @@ void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t,
 address TemplateInterpreterGenerator::generate_trace_code(TosState state) {
   address entry = __ pc();
 
-  __ push_reg(lr);
+  __ push_reg(ra);
   __ push(state);
   __ push_reg(RegSet::range(x10, x17) + RegSet::range(x5, x7) + RegSet::range(x28, x31), sp);
   __ mv(c_rarg2, x10);  // Pass itos
   __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), c_rarg1, c_rarg2, c_rarg3);
   __ pop_reg(RegSet::range(x10, x17) + RegSet::range(x5, x7) + RegSet::range(x28, x31), sp);
   __ pop(state);
-  __ pop_reg(lr);
+  __ pop_reg(ra);
   __ ret();                                   // return from result handler
 
   return entry;
diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
index bdfd540c878..84b1afc7dc6 100644
--- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp
+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -49,7 +49,7 @@
 // Platform-dependent initialization
 
 void TemplateTable::pd_initialize() {
-  // No riscv64 specific initialization
+  // No RISC-V specific initialization
 }
 
 // Address computation: local variables
@@ -76,16 +76,14 @@ static inline Address aaddress(int n) {
 
 static inline Address iaddress(Register r,  Register temp, InterpreterMacroAssembler* _masm) {
   assert_cond(_masm != NULL);
-  _masm->slli(temp, r, 3);
-  _masm->add(temp, xlocals, temp);
+  _masm->shadd(temp, r, xlocals, temp, 3);
   return Address(temp, 0);
 }
 
 static inline Address laddress(Register r, Register temp,
                                InterpreterMacroAssembler* _masm) {
   assert_cond(_masm != NULL);
-  _masm->slli(temp, r, 3);
-  _masm->add(temp, xlocals, temp);
+  _masm->shadd(temp, r, xlocals, temp, 3);
   return Address(temp, Interpreter::local_offset_in_bytes(1));;
 }
 
@@ -301,7 +299,7 @@ void TemplateTable::sipush()
 {
   transition(vtos, itos);
   __ load_unsigned_short(x10, at_bcp(1));
-  __ grevw(x10, x10);
+  __ revb_w_w(x10, x10);
   __ sraiw(x10, x10, 16);
 }
 
@@ -352,8 +350,7 @@ void TemplateTable::ldc(bool wide)
   __ bne(x13, t1, notFloat);
 
   // ftos
-  __ slli(x11, x11, 3);
-  __ add(x11, x12, x11);
+  __ shadd(x11, x11, x12, x11, 3);
   __ flw(f10, Address(x11, base_offset));
   __ push_f(f10);
   __ j(Done);
@@ -364,8 +361,7 @@ void TemplateTable::ldc(bool wide)
   __ bne(x13, t1, notInt);
 
   // itos
-  __ slli(x11, x11, 3);
-  __ add(x11, x12, x11);
+  __ shadd(x11, x11, x12, x11, 3);
   __ lw(x10, Address(x11, base_offset));
   __ push_i(x10);
   __ j(Done);
@@ -413,7 +409,7 @@ void TemplateTable::fast_aldc(bool wide)
     int32_t offset = 0;
     __ movptr_with_offset(rarg, Universe::the_null_sentinel_addr(), offset);
     __ ld(tmp, Address(rarg, offset));
-    __ oop_bne(result, tmp, notNull);
+    __ bne(result, tmp, notNull);
     __ mv(result, zr);  // NULL object reference
     __ bind(notNull);
   }
@@ -441,8 +437,7 @@ void TemplateTable::ldc2_w()
     __ bne(x12, t1, notDouble);
 
     // dtos
-    __ slli(x12, x10, 3);
-    __ add(x12, x11, x12);
+    __ shadd(x12, x10, x11, x12, 3);
     __ fld(f10, Address(x12, base_offset));
     __ push_d(f10);
     __ j(Done);
@@ -452,8 +447,7 @@ void TemplateTable::ldc2_w()
     __ bne(x12, t1, notLong);
 
     // ltos
-    __ slli(x10, x10, 3);
-    __ add(x10, x11, x10);
+    __ shadd(x10, x10, x11, x10, 3);
     __ ld(x10, Address(x10, base_offset));
     __ push_l(x10);
     __ j(Done);
@@ -461,7 +455,6 @@ void TemplateTable::ldc2_w()
     __ bind(notLong);
     condy_helper(Done);
     __ bind(Done);
-
 }
 
 void TemplateTable::condy_helper(Label& Done)
@@ -487,8 +480,8 @@ void TemplateTable::condy_helper(Label& Done)
   __ add(off, obj, off);
   const Address field(off, 0); // base + R---->base + offset
 
-  __ slli(flags, flags, registerSize - (ConstantPoolCacheEntry::tos_state_shift + ConstantPoolCacheEntry::tos_state_bits));
-  __ srli(flags, flags, registerSize - ConstantPoolCacheEntry::tos_state_bits); // (1 << 5) - 4 --> 28~31==> flags:0~3
+  __ slli(flags, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift + ConstantPoolCacheEntry::tos_state_bits));
+  __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits); // (1 << 5) - 4 --> 28~31==> flags:0~3
 
   switch (bytecode()) {
     case Bytecodes::_ldc:   // fall through
@@ -682,7 +675,7 @@ void TemplateTable::aload()
 
 void TemplateTable::locals_index_wide(Register reg) {
   __ lhu(reg, at_bcp(2));
-  __ grevhu(reg, reg); // reverse bytes in half-word and zero-extend
+  __ revb_h_h_u(reg, reg); // reverse bytes in half-word and zero-extend
   __ neg(reg, reg);
 }
 
@@ -696,7 +689,7 @@ void TemplateTable::wide_lload()
 {
   transition(vtos, ltos);
   __ lhu(x11, at_bcp(2));
-  __ grevhu(x11, x11); // reverse bytes in half-word and zero-extend
+  __ revb_h_h_u(x11, x11); // reverse bytes in half-word and zero-extend
   __ slli(x11, x11, LogBytesPerWord);
   __ sub(x11, xlocals, x11);
   __ ld(x10, Address(x11, Interpreter::local_offset_in_bytes(1)));
@@ -713,7 +706,7 @@ void TemplateTable::wide_dload()
 {
   transition(vtos, dtos);
   __ lhu(x11, at_bcp(2));
-  __ grevhu(x11, x11); // reverse bytes in half-word and zero-extend
+  __ revb_h_h_u(x11, x11); // reverse bytes in half-word and zero-extend
   __ slli(x11, x11, LogBytesPerWord);
   __ sub(x11, xlocals, x11);
   __ fld(f10, Address(x11, Interpreter::local_offset_in_bytes(1)));
@@ -757,8 +750,7 @@ void TemplateTable::iaload()
   // x11: index
   index_check(x10, x11); // leaves index in x11
   __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_INT) >> 2);
-  __ slli(t0, x11, 2);
-  __ add(x10, t0, x10);
+  __ shadd(x10, x11, x10, t0, 2);
   __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
   __ addw(x10, x10, zr); // signed extended
 }
@@ -772,8 +764,7 @@ void TemplateTable::laload()
   // x11: index
   index_check(x10, x11); // leaves index in x11
   __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_LONG) >> 3);
-  __ slli(t0, x11, 3);
-  __ add(x10, t0, x10);
+  __ shadd(x10, x11, x10, t0, 3);
   __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
 }
 
@@ -786,8 +777,7 @@ void TemplateTable::faload()
   // x11: index
   index_check(x10, x11); // leaves index in x11
   __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_FLOAT) >> 2);
-  __ slli(t0, x11, 2);
-  __ add(x10, t0, x10);
+  __ shadd(x10, x11, x10, t0, 2);
   __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
 }
 
@@ -800,8 +790,7 @@ void TemplateTable::daload()
   // x11: index
   index_check(x10, x11); // leaves index in x11
   __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) >> 3);
-  __ slli(t0, x11, 3);
-  __ add(x10, t0, x10);
+  __ shadd(x10, x11, x10, t0, 3);
   __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
 }
 
@@ -814,8 +803,7 @@ void TemplateTable::aaload()
   // x11: index
   index_check(x10, x11); // leaves index in x11
   __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop);
-  __ slli(t0, x11, LogBytesPerHeapOop);
-  __ add(x10, t0, x10);
+  __ shadd(x10, x11, x10, t0, LogBytesPerHeapOop);
   do_oop_load(_masm,
               Address(x10),
               x10,
@@ -831,8 +819,7 @@ void TemplateTable::baload()
   // x11: index
   index_check(x10, x11); // leaves index in x11
   __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_BYTE) >> 0);
-  __ slli(t0, x11, 0);
-  __ add(x10, t0, x10);
+  __ shadd(x10, x11, x10, t0, 0);
   __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
 }
 
@@ -845,8 +832,7 @@ void TemplateTable::caload()
   // x11: index
   index_check(x10, x11); // leaves index in x11
   __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1);
-  __ slli(t0, x11, 1);
-  __ add(x10, t0, x10);
+  __ shadd(x10, x11, x10, t0, 1);
   __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
 }
 
@@ -863,8 +849,7 @@ void TemplateTable::fast_icaload()
   // x11: index
   index_check(x10, x11); // leaves index in x11, kills t0
   __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1); // addi, max imm is 2^11
-  __ slli(t0, x11, 1);
-  __ add(x10, x10, t0);
+  __ shadd(x10, x11, x10, t0, 1);
   __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
 }
 
@@ -877,8 +862,7 @@ void TemplateTable::saload()
   // x11: index
   index_check(x10, x11); // leaves index in x11, kills t0
   __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_SHORT) >> 1);
-  __ slli(t0, x11, 1);
-  __ add(x10, t0, x10);
+  __ shadd(x10, x11, x10, t0, 1);
   __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
 }
 
@@ -1065,8 +1049,7 @@ void TemplateTable::iastore() {
   // x13: array
   index_check(x13, x11); // prefer index in x11
   __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_INT) >> 2);
-  __ slli(t0, x11, 2);
-  __ add(t0, x13, t0);
+  __ shadd(t0, x11, x13, t0, 2);
   __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg);
 }
 
@@ -1079,8 +1062,7 @@ void TemplateTable::lastore() {
   // x13: array
   index_check(x13, x11); // prefer index in x11
   __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_LONG) >> 3);
-  __ slli(t0, x11, 3);
-  __ add(t0, x13, t0);
+  __ shadd(t0, x11, x13, t0, 3);
   __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg);
 }
 
@@ -1093,8 +1075,7 @@ void TemplateTable::fastore() {
   // x13:  array
   index_check(x13, x11); // prefer index in x11
   __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_FLOAT) >> 2);
-  __ slli(t0, x11, 2);
-  __ add(t0, x13, t0);
+  __ shadd(t0, x11, x13, t0, 2);
   __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* ftos */, noreg, noreg);
 }
 
@@ -1107,8 +1088,7 @@ void TemplateTable::dastore() {
   // x13:  array
   index_check(x13, x11); // prefer index in x11
   __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) >> 3);
-  __ slli(t0, x11, 3);
-  __ add(t0, x13, t0);
+  __ shadd(t0, x11, x13, t0, 3);
   __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* dtos */, noreg, noreg);
 }
 
@@ -1122,8 +1102,7 @@ void TemplateTable::aastore() {
 
   index_check(x13, x12);     // kills x11
   __ add(x14, x12, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop);
-  __ slli(x14, x14, LogBytesPerHeapOop);
-  __ add(x14, x13, x14);
+  __ shadd(x14, x14, x13, x14, LogBytesPerHeapOop);
 
   Address element_address(x14, 0);
 
@@ -1204,8 +1183,7 @@ void TemplateTable::castore()
   // x13: array
   index_check(x13, x11); // prefer index in x11
   __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1);
-  __ slli(t0, x11, 1);
-  __ add(t0, x13, t0);
+  __ shadd(t0, x11, x13, t0, 1);
   __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg);
 }
 
@@ -1573,8 +1551,8 @@ void TemplateTable::wide_iinc()
 {
   transition(vtos, vtos);
   __ lwu(x11, at_bcp(2)); // get constant and index
-  __ grev16wu(x11, x11); // reverse bytes in half-word (32bit) and zero-extend
-  __ zero_ext(x12, x11, 48);
+  __ revb_h_w_u(x11, x11); // reverse bytes in half-word (32bit) and zero-extend
+  __ zero_extend(x12, x11, 16);
   __ neg(x12, x12);
   __ slli(x11, x11, 32);
   __ srai(x11, x11, 48);
@@ -1633,7 +1611,7 @@ void TemplateTable::convert()
   // Conversion
   switch (bytecode()) {
     case Bytecodes::_i2l:
-      __ sign_ext(x10, x10, registerSize - 32);
+      __ sign_extend(x10, x10, 32);
       break;
     case Bytecodes::_i2f:
       __ fcvt_s_w(f10, x10);
@@ -1642,13 +1620,13 @@ void TemplateTable::convert()
       __ fcvt_d_w(f10, x10);
       break;
     case Bytecodes::_i2b:
-      __ sign_ext(x10, x10, registerSize - 8);
+      __ sign_extend(x10, x10, 8);
       break;
     case Bytecodes::_i2c:
-      __ zero_ext(x10, x10, registerSize - 16);
+      __ zero_extend(x10, x10, 16);
       break;
     case Bytecodes::_i2s:
-      __ sign_ext(x10, x10, registerSize - 16);
+      __ sign_extend(x10, x10, 16);
       break;
     case Bytecodes::_l2i:
       __ addw(x10, x10, zr);
@@ -1733,10 +1711,10 @@ void TemplateTable::branch(bool is_jsr, bool is_wide)
   // load branch displacement
   if (!is_wide) {
     __ lhu(x12, at_bcp(1));
-    __ grevh(x12, x12); // reverse bytes in half-word and sign-extend
+    __ revb_h_h(x12, x12); // reverse bytes in half-word and sign-extend
   } else {
     __ lwu(x12, at_bcp(1));
-    __ grevw(x12, x12); // reverse bytes in word and sign-extend
+    __ revb_w_w(x12, x12); // reverse bytes in word and sign-extend
   }
 
   // Handle all the JSR stuff here, then exit.
@@ -1800,7 +1778,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide)
         __ beqz(x11, no_mdo);
         // Increment the MDO backedge counter
         const Address mdo_backedge_counter(x11, in_bytes(MethodData::backedge_counter_offset()) +
-                                                in_bytes(InvocationCounter::counter_offset()));
+                                           in_bytes(InvocationCounter::counter_offset()));
         const Address mask(x11, in_bytes(MethodData::backedge_mask_offset()));
         __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
                                    x10, t0, false,
@@ -1817,9 +1795,9 @@ void TemplateTable::branch(bool is_jsr, bool is_wide)
     } else { // not TieredCompilation
       // increment counter
       __ ld(t1, Address(xmethod, Method::method_counters_offset()));
-      __ lwu(x10, Address(t1, be_offset));        // load backedge counter
+      __ lwu(x10, Address(t1, be_offset));     // load backedge counter
       __ addw(t0, x10, InvocationCounter::count_increment); // increment counter
-      __ sw(t0, Address(t1, be_offset));        // store counter
+      __ sw(t0, Address(t1, be_offset));       // store counter
 
       __ lwu(x10, Address(t1, inv_offset));    // load invocation counter
       __ andi(x10, x10, (unsigned)InvocationCounter::count_mask_value, x13); // and the status bits
@@ -1860,7 +1838,6 @@ void TemplateTable::branch(bool is_jsr, bool is_wide)
     }
     __ bind(dispatch);
   }
-
   // Pre-load the next target bytecode into t0
   __ load_unsigned_byte(t0, Address(xbcp, 0));
 
@@ -1870,7 +1847,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide)
   __ dispatch_only(vtos, /*generate_poll*/true);
 
   if (UseLoopCounter) {
-    if (ProfileInterpreter) {
+    if (ProfileInterpreter && !TieredCompilation) {
       // Out-of-line code to allocate method data oop.
       __ bind(profile_method);
       __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
@@ -1916,7 +1893,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide)
       // remove activation
       // get sender esp
       __ ld(esp,
-            Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize));
+          Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize));
       // remove frame anchor
       __ leave();
       // Ensure compiled code always sees stack at proper alignment
@@ -1927,7 +1904,6 @@ void TemplateTable::branch(bool is_jsr, bool is_wide)
       __ jr(t0);
     }
   }
-
 }
 
 void TemplateTable::if_0cmp(Condition cc)
@@ -2023,9 +1999,9 @@ void TemplateTable::if_acmp(Condition cc)
   __ pop_ptr(x11);
 
   if (cc == equal) {
-    __ oop_bne(x11, x10, not_taken);
+    __ bne(x11, x10, not_taken);
   } else if (cc == not_equal) {
-    __ oop_beq(x11, x10, not_taken);
+    __ beq(x11, x10, not_taken);
   }
   branch(false, false);
   __ bind(not_taken);
@@ -2069,20 +2045,19 @@ void TemplateTable::tableswitch() {
   // load lo & hi
   __ lwu(x12, Address(x11, BytesPerInt));
   __ lwu(x13, Address(x11, 2 * BytesPerInt));
-  __ grevw(x12, x12); // reverse bytes in word (32bit) and sign-extend
-  __ grevw(x13, x13); // reverse bytes in word (32bit) and sign-extend
+  __ revb_w_w(x12, x12); // reverse bytes in word (32bit) and sign-extend
+  __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend
   // check against lo & hi
   __ blt(x10, x12, default_case);
   __ bgt(x10, x13, default_case);
   // lookup dispatch offset
   __ subw(x10, x10, x12);
-  __ slli(t0, x10, 2);
-  __ add(x13, x11, t0);
+  __ shadd(x13, x10, x11, t0, 2);
   __ lwu(x13, Address(x13, 3 * BytesPerInt));
   __ profile_switch_case(x10, x11, x12);
   // continue execution
   __ bind(continue_execution);
-  __ grevw(x13, x13); // reverse bytes in word (32bit) and sign-extend
+  __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend
   __ add(xbcp, xbcp, x13);
   __ load_unsigned_byte(t0, Address(xbcp));
   __ dispatch_only(vtos, /*generate_poll*/true);
@@ -2102,7 +2077,7 @@ void TemplateTable::fast_linearswitch() {
   transition(itos, vtos);
   Label loop_entry, loop, found, continue_execution;
   // bswap x10 so we can avoid bswapping the table entries
-  __ grevw(x10, x10); // reverse bytes in word (32bit) and sign-extend
+  __ revb_w_w(x10, x10); // reverse bytes in word (32bit) and sign-extend
   // align xbcp
   __ la(x9, at_bcp(BytesPerInt)); // btw: should be able to get rid of
                                     // this instruction (change offsets
@@ -2110,12 +2085,11 @@ void TemplateTable::fast_linearswitch() {
   __ andi(x9, x9, -BytesPerInt);
   // set counter
   __ lwu(x11, Address(x9, BytesPerInt));
-  __ grev32(x11, x11);
+  __ revb_w(x11, x11);
   __ j(loop_entry);
   // table search
   __ bind(loop);
-  __ slli(t0, x11, 3);
-  __ add(t0, x9, t0);
+  __ shadd(t0, x11, x9, t0, 3);
   __ lw(t0, Address(t0, 2 * BytesPerInt));
   __ beq(x10, t0, found);
   __ bind(loop_entry);
@@ -2127,13 +2101,12 @@ void TemplateTable::fast_linearswitch() {
   __ j(continue_execution);
   // entry found -> get offset
   __ bind(found);
-  __ slli(t0, x11, 3);
-  __ add(t0, x9, t0);
+  __ shadd(t0, x11, x9, t0, 3);
   __ lwu(x13, Address(t0, 3 * BytesPerInt));
   __ profile_switch_case(x11, x10, x9);
   // continue execution
   __ bind(continue_execution);
-  __ grevw(x13, x13); // reverse bytes in word (32bit) and sign-extend
+  __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend
   __ add(xbcp, xbcp, x13);
   __ lbu(t0, Address(xbcp, 0));
   __ dispatch_only(vtos, /*generate_poll*/true);
@@ -2186,7 +2159,7 @@ void TemplateTable::fast_binaryswitch() {
   __ lwu(j, Address(array, -BytesPerInt)); // j = length(array)
 
   // Convert j into native byteordering
-  __ grev32(j, j);
+  __ revb_w(j, j);
 
   // And start
   Label entry;
@@ -2202,10 +2175,9 @@ void TemplateTable::fast_binaryswitch() {
     // then [j = h]
     // else [i = h]
     // Convert array[h].match to native byte-ordering before compare
-    __ slli(temp, h, 3);
-    __ add(temp, array, temp);
+    __ shadd(temp, h, array, temp, 3);
     __ ld(temp, Address(temp, 0));
-    __ grevw(temp, temp); // reverse bytes in word (32bit) and sign-extend
+    __ revb_w_w(temp, temp); // reverse bytes in word (32bit) and sign-extend
 
     Label L_done, L_greater;
     __ bge(key, temp, L_greater);
@@ -2226,18 +2198,16 @@ void TemplateTable::fast_binaryswitch() {
   // end of binary search, result index is i (must check again!)
   Label default_case;
   // Convert array[i].match to native byte-ordering before compare
-  __ slli(temp, i, 3);
-  __ add(temp, array, temp);
+  __ shadd(temp, i, array, temp, 3);
   __ ld(temp, Address(temp, 0));
-  __ grevw(temp, temp); // reverse bytes in word (32bit) and sign-extend
+  __ revb_w_w(temp, temp); // reverse bytes in word (32bit) and sign-extend
   __ bne(key, temp, default_case);
 
   // entry found -> j = offset
-  __ slli(temp, i, 3);
-  __ add(temp, array, temp);
+  __ shadd(temp, i, array, temp, 3);
   __ lwu(j, Address(temp, BytesPerInt));
   __ profile_switch_case(i, key, array);
-  __ grevw(j, j); // reverse bytes in word (32bit) and sign-extend
+  __ revb_w_w(j, j); // reverse bytes in word (32bit) and sign-extend
 
   __ add(temp, xbcp, j);
   __ load_unsigned_byte(t0, Address(temp, 0));
@@ -2250,7 +2220,7 @@ void TemplateTable::fast_binaryswitch() {
   __ bind(default_case);
   __ profile_switch_default(i);
   __ lwu(j, Address(array, -2 * BytesPerInt));
-  __ grevw(j, j); // reverse bytes in word (32bit) and sign-extend
+  __ revb_w_w(j, j); // reverse bytes in word (32bit) and sign-extend
 
   __ add(temp, xbcp, j);
   __ load_unsigned_byte(t0, Address(temp, 0));
@@ -2489,23 +2459,15 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteContr
     pop_and_check_object(obj);
   }
 
-  if (!UseBarriersForVolatile) {
-    Label notVolatile;
-    __ andi(t0, raw_flags, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
-    __ beqz(t0, notVolatile);
-    __ membar(MacroAssembler::AnyAny);
-    __ bind(notVolatile);
-  }
-
   __ add(off, obj, off);
   const Address field(off);
 
   Label Done, notByte, notBool, notInt, notShort, notChar,
               notLong, notFloat, notObj, notDouble;
 
-  __ slli(flags, raw_flags, registerSize - (ConstantPoolCacheEntry::tos_state_shift +
-                                            ConstantPoolCacheEntry::tos_state_bits));
-  __ srli(flags, flags, registerSize - ConstantPoolCacheEntry::tos_state_bits);
+  __ slli(flags, raw_flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift +
+                                    ConstantPoolCacheEntry::tos_state_bits));
+  __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits);
 
   assert(btos == 0, "change code, btos != 0");
   __ bnez(flags, notByte);
@@ -2738,9 +2700,9 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr
   Label notByte, notBool, notInt, notShort, notChar,
         notLong, notFloat, notObj, notDouble;
 
-  __ slli(flags, flags, registerSize - (ConstantPoolCacheEntry::tos_state_shift +
-                                        ConstantPoolCacheEntry::tos_state_bits));
-  __ srli(flags, flags, registerSize - ConstantPoolCacheEntry::tos_state_bits);
+  __ slli(flags, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift +
+                                ConstantPoolCacheEntry::tos_state_bits));
+  __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits);
 
   assert(btos == 0, "change code, btos != 0");
   __ bnez(flags, notByte);
@@ -3139,14 +3101,6 @@ void TemplateTable::fast_accessfield(TosState state)
   __ add(x11, x10, x11);
   const Address field(x11, 0);
 
-  if (!UseBarriersForVolatile) {
-    Label notVolatile;
-    __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
-    __ beqz(t0, notVolatile);
-    __ membar(MacroAssembler::AnyAny);
-    __ bind(notVolatile);
-  }
-
   // access field
   switch (bytecode()) {
     case Bytecodes::_fast_agetfield:
@@ -3198,16 +3152,6 @@ void TemplateTable::fast_xaccess(TosState state)
   __ ld(x11, Address(x12, in_bytes(ConstantPoolCache::base_offset() +
                                    ConstantPoolCacheEntry::f2_offset())));
 
-  if (!UseBarriersForVolatile) {
-    Label notVolatile;
-    __ lwu(x13, Address(x12, in_bytes(ConstantPoolCache::base_offset() +
-                                      ConstantPoolCacheEntry::flags_offset())));
-    __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
-    __ beqz(t0, notVolatile);
-    __ membar(MacroAssembler::AnyAny);
-    __ bind(notVolatile);
-  }
-
   // make sure exception is reported in correct bcp range (getfield is
   // next instruction)
   __ addi(xbcp, xbcp, 1);
@@ -3247,11 +3191,6 @@ void TemplateTable::fast_xaccess(TosState state)
 //-----------------------------------------------------------------------------
 // Calls
 
-void TemplateTable::count_calls(Register method, Register temp)
-{
-  __ call_Unimplemented();
-}
-
 void TemplateTable::prepare_invoke(int byte_no,
                                    Register method, // linked method (or i-klass)
                                    Register index,  // itable index, MethodType, etc.
@@ -3273,8 +3212,8 @@ void TemplateTable::prepare_invoke(int byte_no,
   assert(recv  == noreg || recv  == x12, "");
 
   // setup registers & access constant pool cache
-  if (recv  == noreg) {
-    recv  = x12;
+  if (recv == noreg) {
+    recv = x12;
   }
   if (flags == noreg) {
     flags = x13;
@@ -3296,6 +3235,7 @@ void TemplateTable::prepare_invoke(int byte_no,
     // since the parameter_size includes it.
     __ push_reg(x9);
     __ mv(x9, index);
+    assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0");
     __ load_resolved_reference_at_index(index, x9);
     __ pop_reg(x9);
     __ push_reg(index);  // push appendix (MethodType, CallSite, etc.)
@@ -3305,23 +3245,21 @@ void TemplateTable::prepare_invoke(int byte_no,
   // load receiver if needed (note: no return address pushed yet)
   if (load_receiver) {
     __ andi(recv, flags, ConstantPoolCacheEntry::parameter_size_mask); // parameter_size_mask = 1 << 8
-    __ slli(t0, recv, 3);
-    __ add(t0, esp, t0);
+    __ shadd(t0, recv, esp, t0, 3);
     __ ld(recv, Address(t0, -Interpreter::expr_offset_in_bytes(1)));
     __ verify_oop(recv);
   }
 
   // compute return type
-  __ slli(t1, flags, registerSize - (ConstantPoolCacheEntry::tos_state_shift + ConstantPoolCacheEntry::tos_state_bits));
-  __ srli(t1, t1, registerSize - ConstantPoolCacheEntry::tos_state_bits); // (1 << 5) - 4 --> 28~31==> t1:0~3
+  __ slli(t1, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift + ConstantPoolCacheEntry::tos_state_bits));
+  __ srli(t1, t1, XLEN - ConstantPoolCacheEntry::tos_state_bits); // (1 << 5) - 4 --> 28~31==> t1:0~3
 
   // load return address
   {
     const address table_addr = (address) Interpreter::invoke_return_entry_table_for(code);
     __ mv(t0, table_addr);
-    __ slli(t1, t1, 3);
-    __ add(t0, t0, t1);
-    __ ld(lr, Address(t0, 0));
+    __ shadd(t0, t1, t0, t1, 3);
+    __ ld(ra, Address(t0, 0));
   }
 }
 
@@ -3481,7 +3419,9 @@ void TemplateTable::invokeinterface(int byte_no) {
   __ profile_virtual_call(x13, x30, x9);
 
   // Get declaring interface class from method, and itable index
-  __ load_method_holder(x10, xmethod);
+  __ ld(x10, Address(xmethod, Method::const_offset()));
+  __ ld(x10, Address(x10, ConstMethod::constants_offset()));
+  __ ld(x10, Address(x10, ConstantPool::pool_holder_offset_in_bytes()));
   __ lwu(xmethod, Address(xmethod, Method::itable_index_offset()));
   __ subw(xmethod, xmethod, Method::itable_index_max);
   __ negw(xmethod, xmethod);
@@ -3668,7 +3608,7 @@ void TemplateTable::_new() {
       __ bnez(x13, loop);
     }
 
-    // initialize object hader only.
+    // initialize object header only.
     __ bind(initialize_header);
     if (UseBiasedLocking) {
       __ ld(t0, Address(x14, Klass::prototype_header_offset()));
@@ -4045,8 +3985,7 @@ void TemplateTable::wide()
 {
   __ load_unsigned_byte(x9, at_bcp(1));
   __ mv(t0, (address)Interpreter::_wentry_point);
-  __ slli(t1, x9, 3);
-  __ add(t0, t1, t0);
+  __ shadd(t0, x9, t0, t1, 3);
   __ ld(t0, Address(t0));
   __ jr(t0);
 }
@@ -4057,13 +3996,11 @@ void TemplateTable::multianewarray() {
   __ load_unsigned_byte(x10, at_bcp(3)); // get number of dimensions
   // last dim is on top of stack; we want address of first one:
   // first_addr = last_addr + (ndims - 1) * wordSize
-  __ slli(c_rarg1, x10, 3);
-  __ add(c_rarg1, c_rarg1, esp);
+  __ shadd(c_rarg1, x10, esp, c_rarg1, 3);
   __ sub(c_rarg1, c_rarg1, wordSize);
   call_VM(x10,
           CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray),
           c_rarg1);
   __ load_unsigned_byte(x11, at_bcp(3));
-  __ slli(t0, x11, 3);
-  __ add(esp, esp, t0);
+  __ shadd(esp, x11, esp, t0, 3);
 }
diff --git a/src/hotspot/cpu/riscv/vmStructs_riscv.hpp b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp
index ca19023096f..6c89133de02 100644
--- a/src/hotspot/cpu/riscv/vmStructs_riscv.hpp
+++ b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp
@@ -1,6 +1,5 @@
 /*
  * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
  * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp
index 7d4ad0fb6ec..6bdce51506e 100644
--- a/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp
+++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -23,6 +23,7 @@
  *
  */
 
+#include "precompiled.hpp"
 #include "memory/allocation.hpp"
 #include "memory/allocation.inline.hpp"
 #include "runtime/os.inline.hpp"
@@ -42,16 +43,11 @@ void VM_Version_Ext::initialize_cpu_information(void) {
     return;
   }
 
-  int core_id = -1;
-  int chip_id = -1;
-  int len = 0;
-  char* src_string = NULL;
-
   _no_of_cores  = os::processor_count();
   _no_of_threads = _no_of_cores;
   _no_of_sockets = _no_of_cores;
-  snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "Riscv64");
-  snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "Riscv64 %s", _features_string);
+  snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "RISCV64");
+  snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", _features_string);
   _initialized = true;
 }
 
diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp
index 31a25e097e8..711e4aeaf68 100644
--- a/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp
+++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
index 142e08393d2..5be0312733e 100644
--- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp
+++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -24,59 +24,14 @@
  */
 
 #include "precompiled.hpp"
-#include "asm/macroAssembler.hpp"
-#include "asm/macroAssembler.inline.hpp"
-#include "memory/resourceArea.hpp"
 #include "runtime/java.hpp"
 #include "runtime/os.hpp"
-#include "runtime/stubCodeGenerator.hpp"
 #include "runtime/vm_version.hpp"
 #include "utilities/formatBuffer.hpp"
 #include "utilities/macros.hpp"
 
 #include OS_HEADER_INLINE(os)
 
-#include <sys/auxv.h>
-#include <asm/hwcap.h>
-
-#ifndef HWCAP_ISA_I
-#define HWCAP_ISA_I  (1 << ('I' - 'A'))
-#endif
-
-#ifndef HWCAP_ISA_M
-#define HWCAP_ISA_M  (1 << ('M' - 'A'))
-#endif
-
-#ifndef HWCAP_ISA_A
-#define HWCAP_ISA_A  (1 << ('A' - 'A'))
-#endif
-
-#ifndef HWCAP_ISA_F
-#define HWCAP_ISA_F  (1 << ('F' - 'A'))
-#endif
-
-#ifndef HWCAP_ISA_D
-#define HWCAP_ISA_D  (1 << ('D' - 'A'))
-#endif
-
-#ifndef HWCAP_ISA_C
-#define HWCAP_ISA_C  (1 << ('C' - 'A'))
-#endif
-
-#ifndef HWCAP_ISA_V
-#define HWCAP_ISA_V  (1 << ('V' - 'A'))
-#endif
-
-#define read_csr(csr)                                           \
-({                                                              \
-        register unsigned long __v;                             \
-        __asm__ __volatile__ ("csrr %0, %1"                     \
-                              : "=r" (__v)                      \
-                              : "i" (csr)                       \
-                              : "memory");                      \
-        __v;                                                    \
-})
-
 address VM_Version::_checkvext_fault_pc = NULL;
 address VM_Version::_checkvext_continuation_pc = NULL;
 
@@ -120,55 +75,13 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
 const char* VM_Version::_uarch = "";
 uint32_t VM_Version::_initial_vector_length = 0;
 
-uint32_t VM_Version::get_current_vector_length() {
-  assert(_features & CPU_V, "should not call this");
-  return (uint32_t)read_csr(CSR_VLENB);
-}
-
-void VM_Version::get_os_cpu_info() {
-
-  uint64_t auxv = getauxval(AT_HWCAP);
-
-  assert(CPU_I == HWCAP_ISA_I, "Flag CPU_I must follow Linux HWCAP");
-  assert(CPU_M == HWCAP_ISA_M, "Flag CPU_M must follow Linux HWCAP");
-  assert(CPU_A == HWCAP_ISA_A, "Flag CPU_A must follow Linux HWCAP");
-  assert(CPU_F == HWCAP_ISA_F, "Flag CPU_F must follow Linux HWCAP");
-  assert(CPU_D == HWCAP_ISA_D, "Flag CPU_D must follow Linux HWCAP");
-  assert(CPU_C == HWCAP_ISA_C, "Flag CPU_C must follow Linux HWCAP");
-  assert(CPU_V == HWCAP_ISA_V, "Flag CPU_V must follow Linux HWCAP");
-
-  // RISC-V has four bit-manipulation ISA-extensions: Zba/Zbb/Zbc/Zbs.
-  // Availability for those extensions could not be queried from HWCAP.
-  // TODO: Add proper detection for those extensions.
-  _features = auxv & (
-          HWCAP_ISA_I |
-          HWCAP_ISA_M |
-          HWCAP_ISA_A |
-          HWCAP_ISA_F |
-          HWCAP_ISA_D |
-          HWCAP_ISA_C |
-          HWCAP_ISA_V);
-
-  if (FILE *f = fopen("/proc/cpuinfo", "r")) {
-    char buf[512], *p;
-    while (fgets(buf, sizeof (buf), f) != NULL) {
-      if ((p = strchr(buf, ':')) != NULL) {
-        if (strncmp(buf, "uarch", sizeof "uarch" - 1) == 0) {
-          char* uarch = os::strdup(p + 2);
-          uarch[strcspn(uarch, "\n")] = '\0';
-          _uarch = uarch;
-          break;
-        }
-      }
-    }
-    fclose(f);
-  }
-}
+void VM_Version::initialize() {
+  get_os_cpu_info();
 
-void VM_Version::get_processor_features() {
   if (FLAG_IS_DEFAULT(UseFMA)) {
     FLAG_SET_DEFAULT(UseFMA, true);
   }
+
   if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
     FLAG_SET_DEFAULT(AllocatePrefetchDistance, 0);
   }
@@ -209,11 +122,6 @@ void VM_Version::get_processor_features() {
     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
   }
 
-  if (UsePopCountInstruction) {
-    warning("Pop count instructions are not available on this CPU.");
-    FLAG_SET_DEFAULT(UsePopCountInstruction, false);
-  }
-
   if (UseCRC32Intrinsics) {
     warning("CRC32 intrinsics are not available on this CPU.");
     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
@@ -252,35 +160,80 @@ void VM_Version::get_processor_features() {
     }
   }
 
+  if (UseRVB && !(_features & CPU_B)) {
+    warning("RVB is not supported on this CPU");
+    FLAG_SET_DEFAULT(UseRVB, false);
+  }
+
+  if (UseRVC && !(_features & CPU_C)) {
+    warning("RVC is not supported on this CPU");
+    FLAG_SET_DEFAULT(UseRVC, false);
+  }
+
   if (FLAG_IS_DEFAULT(AvoidUnalignedAccesses)) {
     FLAG_SET_DEFAULT(AvoidUnalignedAccesses, true);
   }
 
+  if (UseRVB) {
+    if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
+      FLAG_SET_DEFAULT(UsePopCountInstruction, true);
+    }
+  } else {
+    FLAG_SET_DEFAULT(UsePopCountInstruction, false);
+  }
+
+  char buf[512];
+  buf[0] = '\0';
+  if (_uarch != NULL && strcmp(_uarch, "") != 0) snprintf(buf, sizeof(buf), "%s,", _uarch);
+  strcat(buf, "rv64");
+#define ADD_FEATURE_IF_SUPPORTED(id, name, bit) if (_features & CPU_##id) strcat(buf, name);
+  CPU_FEATURE_FLAGS(ADD_FEATURE_IF_SUPPORTED)
+#undef ADD_FEATURE_IF_SUPPORTED
+
+  _features_string = os::strdup(buf);
+
 #ifdef COMPILER2
-  get_c2_processor_features();
+  c2_initialize();
 #endif // COMPILER2
+
+  UNSUPPORTED_OPTION(CriticalJNINatives);
+
+  FLAG_SET_DEFAULT(UseMembar, true);
 }
 
 #ifdef COMPILER2
-void VM_Version::get_c2_processor_features() {
-  // lack of cmove in riscv64
+void VM_Version::c2_initialize() {
   if (UseCMoveUnconditionally) {
     FLAG_SET_DEFAULT(UseCMoveUnconditionally, false);
   }
+
   if (ConditionalMoveLimit > 0) {
     FLAG_SET_DEFAULT(ConditionalMoveLimit, 0);
   }
 
-  // disable vector
-  if (FLAG_IS_DEFAULT(UseSuperWord)) {
-    FLAG_SET_DEFAULT(UseSuperWord, false);
+  if (!UseRVV) {
+    FLAG_SET_DEFAULT(SpecialEncodeISOArray, false);
   }
-  if (FLAG_IS_DEFAULT(MaxVectorSize)) {
+
+  if (!UseRVV && MaxVectorSize) {
     FLAG_SET_DEFAULT(MaxVectorSize, 0);
   }
-  if (MaxVectorSize > 0) {
-    warning("Vector instructions are not available on this CPU");
-    FLAG_SET_DEFAULT(MaxVectorSize, 0);
+
+  if (UseRVV) {
+    if (FLAG_IS_DEFAULT(MaxVectorSize)) {
+      MaxVectorSize = _initial_vector_length;
+    } else if (MaxVectorSize < 16) {
+      warning("RVV does not support vector length less than 16 bytes. Disabling RVV.");
+      UseRVV = false;
+    } else if (is_power_of_2(MaxVectorSize)) {
+      if (MaxVectorSize > _initial_vector_length) {
+        warning("Current system only supports max RVV vector length %d. Set MaxVectorSize to %d",
+                _initial_vector_length, _initial_vector_length);
+      }
+      MaxVectorSize = _initial_vector_length;
+    } else {
+      vm_exit_during_initialization(err_msg("Unsupported MaxVectorSize: %d", (int)MaxVectorSize));
+    }
   }
 
   if (UseRVV) {
@@ -292,10 +245,25 @@ void VM_Version::get_c2_processor_features() {
   if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
     FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
   }
-}
-#endif // COMPILER2
 
-void VM_Version::initialize() {
-  get_processor_features();
-  UNSUPPORTED_OPTION(CriticalJNINatives);
+  if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
+    FLAG_SET_DEFAULT(UseMulAddIntrinsic, true);
+  }
+
+  if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
+    FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, true);
+  }
+
+  if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
+    FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, true);
+  }
+
+  if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
+    FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, true);
+  }
+
+  if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
+    FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, true);
+  }
 }
+#endif // COMPILER2
diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
index e3302422fa5..85369ce0493 100644
--- a/src/hotspot/cpu/riscv/vm_version_riscv.hpp
+++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -33,33 +33,39 @@
 #include "utilities/sizes.hpp"
 
 class VM_Version : public Abstract_VM_Version {
-public:
-  // Initialization
-  static void initialize();
+#ifdef COMPILER2
+private:
+  static void c2_initialize();
+#endif // COMPILER2
 
 protected:
   static const char* _uarch;
   static uint32_t _initial_vector_length;
-  static void get_processor_features();
-  static uint32_t get_current_vector_length();
   static void get_os_cpu_info();
+  static uint32_t get_current_vector_length();
+
+public:
+  // Initialization
+  static void initialize();
 
   enum Feature_Flag {
-#define CPU_FEATURE_FLAGS(decl)             \
-  decl(I,            "i",            8)     \
-  decl(M,            "m",           12)     \
-  decl(A,            "a",            0)     \
-  decl(F,            "f",            5)     \
-  decl(D,            "d",            3)     \
-  decl(C,            "c",            2)     \
-  decl(V,            "v",           21)
+#define CPU_FEATURE_FLAGS(decl)               \
+    decl(I,            "i",            8)     \
+    decl(M,            "m",           12)     \
+    decl(A,            "a",            0)     \
+    decl(F,            "f",            5)     \
+    decl(D,            "d",            3)     \
+    decl(C,            "c",            2)     \
+    decl(V,            "v",           21)     \
+    decl(B,            "b",            1)
 
 #define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1 << bit),
-      CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG)
+    CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG)
 #undef DECLARE_CPU_FEATURE_FLAG
   };
 
-public:
+  static void initialize_cpu_information(void);
+
   static bool is_checkvext_fault(address pc) {
     return pc != NULL && pc == _checkvext_fault_pc;
   }
@@ -72,6 +78,7 @@ class VM_Version : public Abstract_VM_Version {
   static address _checkvext_fault_pc;
   static address _checkvext_continuation_pc;
 
+
 #ifdef COMPILER2
 private:
   static void get_c2_processor_features();
diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.cpp b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
index c9152a67b81..c4338715f95 100644
--- a/src/hotspot/cpu/riscv/vmreg_riscv.cpp
+++ b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
@@ -1,7 +1,6 @@
 /*
  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -29,8 +28,8 @@
 #include "code/vmreg.hpp"
 
 void VMRegImpl::set_regName() {
-  Register reg = ::as_Register(0);
   int i = 0;
+  Register reg = ::as_Register(0);
   for ( ; i < ConcreteRegisterImpl::max_gpr ; ) {
     for (int j = 0 ; j < RegisterImpl::max_slots_per_register ; j++) {
       regName[i++] = reg->name();
@@ -46,7 +45,7 @@ void VMRegImpl::set_regName() {
     freg = freg->successor();
   }
 
-  for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) {
+  for ( ; i < ConcreteRegisterImpl::number_of_registers ; i++) {
     regName[i] = "NON-GPR-FPR";
   }
 }
diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.hpp
index 8454b811cb6..6f613a8f11a 100644
--- a/src/hotspot/cpu/riscv/vmreg_riscv.hpp
+++ b/src/hotspot/cpu/riscv/vmreg_riscv.hpp
@@ -1,7 +1,6 @@
 /*
  * Copyright (c) 2006, 2019, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -35,35 +34,20 @@ inline bool is_FloatRegister() {
   return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr;
 }
 
-inline bool is_VectorRegister() {
-  return value() >= ConcreteRegisterImpl::max_fpr && value() < ConcreteRegisterImpl::max_vpr;
-}
-
 inline Register as_Register() {
-  assert( is_Register(), "must be");
+  assert(is_Register(), "must be");
   return ::as_Register(value() / RegisterImpl::max_slots_per_register);
 }
 
 inline FloatRegister as_FloatRegister() {
-  assert( is_FloatRegister() && is_even(value()), "must be" );
+  assert(is_FloatRegister() && is_even(value()), "must be");
   return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) /
                             FloatRegisterImpl::max_slots_per_register);
 }
 
-inline VectorRegister as_VectorRegister() {
-  assert( is_VectorRegister() && ((value() & (VectorRegisterImpl::max_slots_per_register - 1)) == 0), "must be" );
-  return ::as_VectorRegister((value() - ConcreteRegisterImpl::max_fpr) /
-                             VectorRegisterImpl::max_slots_per_register);
-}
-
 inline bool is_concrete() {
   assert(is_reg(), "must be");
-  if (is_VectorRegister()) {
-    int base = value() - ConcreteRegisterImpl::max_fpr;
-    return (base % VectorRegisterImpl::max_slots_per_register) == 0;
-  } else {
-    return is_even(value());
-  }
+  return is_even(value());
 }
 
 #endif // CPU_RISCV_VMREG_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp
index 135d0c62a2b..06b70020b4b 100644
--- a/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp
+++ b/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp
@@ -1,7 +1,6 @@
 /*
  * Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,19 +26,19 @@
 #ifndef CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP
 #define CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP
 
-inline VMReg RegisterImpl::as_VMReg() {
-  if( this == noreg ) {
+inline VMReg RegisterImpl::as_VMReg() const {
+  if (this == noreg) {
     return VMRegImpl::Bad();
   }
   return VMRegImpl::as_VMReg(encoding() * RegisterImpl::max_slots_per_register);
 }
 
-inline VMReg FloatRegisterImpl::as_VMReg() {
+inline VMReg FloatRegisterImpl::as_VMReg() const {
   return VMRegImpl::as_VMReg((encoding() * FloatRegisterImpl::max_slots_per_register) +
                              ConcreteRegisterImpl::max_gpr);
 }
 
-inline VMReg VectorRegisterImpl::as_VMReg() {
+inline VMReg VectorRegisterImpl::as_VMReg() const {
   return VMRegImpl::as_VMReg((encoding() * VectorRegisterImpl::max_slots_per_register) +
                              ConcreteRegisterImpl::max_fpr);
 }
diff --git a/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp b/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp
index c95b968a167..78b81138003 100644
--- a/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp
+++ b/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, Red Hat Inc. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -254,7 +254,7 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
 }
 
 int VtableStub::pd_code_alignment() {
-  // riscv64 cache line size is 64 bytes, but we want to limit alignment loss.
+  // RISCV cache line size is not an architected constant. We just align on word size.
   const unsigned int icache_line_size = wordSize;
   return icache_line_size;
 }
diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
index ec6550c2cef..2ed532fd9d7 100644
--- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2016, 2019, SAP SE. All rights reserved.
+ * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2019 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
index a336d2a35c1..965ffaa604f 100644
--- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp
index c09480abe0e..1c4887eb7e4 100644
--- a/src/hotspot/os/linux/os_linux.cpp
+++ b/src/hotspot/os/linux/os_linux.cpp
@@ -2042,8 +2042,6 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen) {
   static  Elf32_Half running_arch_code=EM_PPC64;
 #elif  (defined __powerpc__)
   static  Elf32_Half running_arch_code=EM_PPC;
-#elif  (defined riscv)
-  static  Elf32_Half running_arch_code=EM_RISCV;
 #elif  (defined AARCH64)
   static  Elf32_Half running_arch_code=EM_AARCH64;
 #elif  (defined ARM)
diff --git a/src/hotspot/os/posix/os_posix.cpp b/src/hotspot/os/posix/os_posix.cpp
index e8363fd617a..bc5d1fe24a2 100644
--- a/src/hotspot/os/posix/os_posix.cpp
+++ b/src/hotspot/os/posix/os_posix.cpp
@@ -117,10 +117,6 @@ void os::check_dump_limit(char* buffer, size_t bufferSize) {
   VMError::record_coredump_status(buffer, success);
 }
 
-// Native stack isn't walkable for RISCV this way.
-// Native C frame and Java frame have different structure on RISCV.
-// A seperate implementation is provided under linux_riscv for RISCV.
-#if !defined(RISCV) || defined(ZERO)
 int os::get_native_stack(address* stack, int frames, int toSkip) {
   int frame_idx = 0;
   int num_of_frames;  // number of frames captured
@@ -147,7 +143,7 @@ int os::get_native_stack(address* stack, int frames, int toSkip) {
 
   return num_of_frames;
 }
-#endif
+
 
 bool os::unsetenv(const char* name) {
   assert(name != NULL, "Null pointer");
diff --git a/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp
index 55038d243e9..f2610af6cdd 100644
--- a/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp
+++ b/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp
@@ -24,5 +24,3 @@
  */
 
 // nothing required here
-
-
diff --git a/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
index 50ae9121888..9b8b1a31774 100644
--- a/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
+++ b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
@@ -30,13 +30,16 @@
 
 // Implementation of class atomic
 
+// Note that memory_order_conservative requires a full barrier after atomic stores.
+// See https://patchwork.kernel.org/patch/3575821/
+
 #define FULL_MEM_BARRIER  __sync_synchronize()
 #define READ_MEM_BARRIER  __atomic_thread_fence(__ATOMIC_ACQUIRE);
 #define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE);
 
 template<size_t byte_size>
 struct Atomic::PlatformAdd
-  : public Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
+  : Atomic::FetchAndAdd<Atomic::PlatformAdd<byte_size> >
 {
   template<typename I, typename D>
   D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const {
diff --git a/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp
index 2bfbeb01148..28868c76406 100644
--- a/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp
+++ b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp
@@ -1,6 +1,5 @@
 /*
  * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
  * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
diff --git a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp
similarity index 85%
rename from src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp
rename to src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp
index 1e9187f6f89..bdf36d6b4c3 100644
--- a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp
+++ b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -23,14 +23,14 @@
  *
  */
 
-#ifndef OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP
-#define OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP
+#ifndef OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP
+#define OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP
 
 static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
   (void)memmove(to, from, count * HeapWordSize);
 }
 
-static inline void pd_disjoint_words_helper(const HeapWord* from, HeapWord* to, size_t count, bool is_atomic) {
+static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
   switch (count) {
     case 8:  to[7] = from[7];   // fall through
     case 7:  to[6] = from[6];   // fall through
@@ -42,20 +42,28 @@ static inline void pd_disjoint_words_helper(const HeapWord* from, HeapWord* to,
     case 1:  to[0] = from[0];   // fall through
     case 0:  break;
     default:
-      if(is_atomic) {
-        while (count-- > 0) { *to++ = *from++; }
-      } else {
-        memcpy(to, from, count * HeapWordSize);
-      }
+      memcpy(to, from, count * HeapWordSize);
+      break;
   }
 }
 
-static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
-  pd_disjoint_words_helper(from, to, count, false);
-}
-
 static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) {
-  pd_disjoint_words_helper(from, to, count, true);
+  switch (count) {
+    case 8:  to[7] = from[7];
+    case 7:  to[6] = from[6];
+    case 6:  to[5] = from[5];
+    case 5:  to[4] = from[4];
+    case 4:  to[3] = from[3];
+    case 3:  to[2] = from[2];
+    case 2:  to[1] = from[1];
+    case 1:  to[0] = from[0];
+    case 0:  break;
+    default:
+      while (count-- > 0) {
+        *to++ = *from++;
+      }
+      break;
+  }
 }
 
 static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
@@ -113,4 +121,4 @@ static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t
   _Copy_arrayof_conjoint_jlongs(from, to, count);
 }
 
-#endif // OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP
+#endif // OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP
diff --git a/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp
index 48ddf796a08..297414bfcd5 100644
--- a/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp
+++ b/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -40,7 +40,4 @@ define_pd_global(uintx, JVMInvokeMethodSlack,     8192);
 // Used on 64 bit platforms for UseCompressedOops base address
 define_pd_global(uintx, HeapBaseMinAddress,       2 * G);
 
-class Thread;
-extern __thread Thread *riscv64_currentThread;
-
 #endif // OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP
diff --git a/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
index 7477206a8b4..5b5d35553f7 100644
--- a/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
+++ b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
@@ -49,6 +49,7 @@ inline void OrderAccess::fence() {
   FULL_MEM_BARRIER;
 }
 
+
 template<size_t byte_size>
 struct OrderAccess::PlatformOrderedLoad<byte_size, X_ACQUIRE>
 {
diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
index c76d6b48036..ea3a57e1da9 100644
--- a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
+++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -108,7 +108,7 @@ intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) {
 // frames. Currently we don't do that on Linux, so it's the same as
 // os::fetch_frame_from_context().
 ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread,
-                                                const ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) {
+  const ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) {
 
   assert(thread != NULL, "just checking");
   assert(ret_sp != NULL, "just checking");
@@ -118,9 +118,9 @@ ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread,
 }
 
 ExtendedPC os::fetch_frame_from_context(const void* ucVoid,
-                                        intptr_t** ret_sp, intptr_t** ret_fp) {
+                    intptr_t** ret_sp, intptr_t** ret_fp) {
 
-  ExtendedPC  epc;
+  ExtendedPC epc;
   const ucontext_t* uc = (const ucontext_t*)ucVoid;
 
   if (uc != NULL) {
@@ -173,14 +173,14 @@ bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t*
       // stack overflow handling
       return false;
     } else {
-      // In compiled code, the stack banging is performed before LR
-      // has been saved in the frame.  LR is live, and SP and FP
+      // In compiled code, the stack banging is performed before RA
+      // has been saved in the frame.  RA is live, and SP and FP
       // belong to the caller.
-      intptr_t* frame_fp = os::Linux::ucontext_get_fp(uc);
-      intptr_t* frame_sp = os::Linux::ucontext_get_sp(uc);
-      address frame_pc = (address)(uintptr_t)(uc->uc_mcontext.__gregs[REG_LR] -
-                                              NativeInstruction::instruction_size);
-      *fr = frame(frame_sp, frame_fp, frame_pc);
+      intptr_t* fp = os::Linux::ucontext_get_fp(uc);
+      intptr_t* sp = os::Linux::ucontext_get_sp(uc);
+      address pc = (address)(uc->uc_mcontext.__gregs[REG_LR]
+                         - NativeInstruction::instruction_size);
+      *fr = frame(sp, fp, pc);
       if (!fr->is_java_frame()) {
         assert(fr->safe_for_sender(thread), "Safety check");
         assert(!fr->is_first_frame(), "Safety check");
@@ -195,14 +195,14 @@ bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t*
 // By default, gcc always saves frame pointer rfp on this stack. This
 // may get turned off by -fomit-frame-pointer.
 frame os::get_sender_for_C_frame(frame* fr) {
-  return frame(fr->c_frame_sender_sp(), fr->c_frame_link(), fr->c_frame_sender_pc());
+  return frame(fr->sender_sp(), fr->link(), fr->sender_pc());
 }
 
 NOINLINE frame os::current_frame() {
   intptr_t **sender_sp = (intptr_t **)__builtin_frame_address(0);
-  if(sender_sp != NULL) {
+  if (sender_sp != NULL) {
     frame myframe((intptr_t*)os::current_stack_pointer(),
-                  sender_sp[frame::c_frame_link_offset],
+                  sender_sp[frame::link_offset],
                   CAST_FROM_FN_PTR(address, os::current_frame));
     if (os::is_first_C_frame(&myframe)) {
       // stack is not walkable
@@ -216,83 +216,7 @@ NOINLINE frame os::current_frame() {
   }
 }
 
-bool os::is_first_C_frame(frame* fr) {
-  // Load up sp, fp, sender sp and sender fp, check for reasonable values.
-  // Check usp first, because if that's bad the other accessors may fault
-  // on some architectures.  Ditto ufp second, etc.
-  uintptr_t fp_align_mask = (uintptr_t)(sizeof(address) - 1);
-  // sp on amd can be 32 bit aligned.
-  uintptr_t sp_align_mask = (uintptr_t)(sizeof(int) - 1);
-
-  uintptr_t usp    = (uintptr_t)fr->sp();
-  if ((usp & sp_align_mask) != 0) {
-    return true;
-  }
-
-  uintptr_t ufp    = (uintptr_t)fr->fp();
-  if ((ufp & fp_align_mask) != 0) {
-    return true;
-  }
-
-  uintptr_t old_sp = (uintptr_t)fr->c_frame_sender_sp();
-  if ((old_sp & sp_align_mask) != 0) {
-    return true;
-  }
-  if (old_sp == 0 || old_sp == (uintptr_t)-1) {
-    return true;
-  }
-
-  uintptr_t old_fp = (uintptr_t)fr->c_frame_link();
-  if ((old_fp & fp_align_mask) != 0) {
-    return true;
-  }
-  if (old_fp == 0 || old_fp == (uintptr_t)-1 || old_fp == ufp) {
-    return true;
-  }
-
-  // stack grows downwards; if old_fp is below current fp or if the stack
-  // frame is too large, either the stack is corrupted or fp is not saved
-  // on stack (i.e. on x86, ebp may be used as general register). The stack
-  // is not walkable beyond current frame.
-  if (old_fp < ufp) {
-    return true;
-  }
-  if (old_fp - ufp > 64 * K) {
-    return true;
-  }
-
-  return false;
-}
-
-int os::get_native_stack(address* stack, int frames, int toSkip) {
-  int frame_idx = 0;
-  int num_of_frames = 0;  // number of frames captured
-  frame fr = os::current_frame();
-  while (fr.pc() && frame_idx < frames) {
-    if (toSkip > 0) {
-      toSkip --;
-    } else {
-      stack[frame_idx ++] = fr.pc();
-    }
-    if (fr.fp() == NULL || fr.cb() != NULL ||
-        fr.c_frame_sender_pc() == NULL || os::is_first_C_frame(&fr)) {
-      break;
-    }
-
-    if (fr.c_frame_sender_pc() && !os::is_first_C_frame(&fr)) {
-      fr = os::get_sender_for_C_frame(&fr);
-    } else {
-      break;
-    }
-  }
-  num_of_frames = frame_idx;
-  for (; frame_idx < frames; frame_idx ++) {
-    stack[frame_idx] = NULL;
-  }
-
-  return num_of_frames;
-}
-
+// Utility functions
 extern "C" JNIEXPORT int
 JVM_handle_linux_signal(int sig,
                         siginfo_t* info,
@@ -327,37 +251,43 @@ JVM_handle_linux_signal(int sig,
 
 #ifdef CAN_SHOW_REGISTERS_ON_ASSERT
   if ((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison) {
-    handle_assert_poison_fault(ucVoid, info->si_addr);
-    return 1;
+    if (handle_assert_poison_fault(ucVoid, info->si_addr)) {
+      return 1;
+    }
   }
 #endif
 
   JavaThread* thread = NULL;
   VMThread* vmthread = NULL;
   if (os::Linux::signal_handlers_are_installed) {
-    if (t != NULL ) {
+    if (t != NULL ){
       if(t->is_Java_thread()) {
-        thread = (JavaThread*)t;
-      } else if(t->is_VM_thread()) {
+        thread = (JavaThread *) t;
+      }
+      else if(t->is_VM_thread()){
         vmthread = (VMThread *)t;
       }
     }
   }
 
+  // Handle SafeFetch faults
+  if ((sig == SIGSEGV || sig == SIGBUS) && uc != NULL) {
+    address const pc = (address) os::Linux::ucontext_get_pc(uc);
+    if (pc && StubRoutines::is_safefetch_fault(pc)) {
+      os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc));
+      return 1;
+    }
+  }
+
   // decide if this trap can be handled by a stub
   address stub = NULL;
 
-  address pc = NULL;
+  address pc          = NULL;
 
   //%note os_trap_1
   if (info != NULL && uc != NULL && thread != NULL) {
     pc = (address) os::Linux::ucontext_get_pc(uc);
 
-    if (StubRoutines::is_safefetch_fault(pc)) {
-      os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc));
-      return 1;
-    }
-
     // Handle ALL stack overflow variations here
     if (sig == SIGSEGV) {
       address addr = (address) info->si_addr;
@@ -372,12 +302,12 @@ JVM_handle_linux_signal(int sig,
               if (os::Linux::get_frame_at_stack_banging_point(thread, uc, &fr)) {
                 assert(fr.is_java_frame(), "Must be a Java frame");
                 frame activation =
-                        SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr);
+                  SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr);
                 if (activation.sp() != NULL) {
                   thread->disable_stack_reserved_zone();
                   if (activation.is_interpreted_frame()) {
                     thread->set_reserved_stack_activation((address)(
-                            activation.fp() + frame::interpreter_frame_initial_sp_offset));
+                      activation.fp() + frame::interpreter_frame_initial_sp_offset));
                   } else {
                     thread->set_reserved_stack_activation((address)activation.unextended_sp());
                   }
@@ -410,14 +340,14 @@ JVM_handle_linux_signal(int sig,
           // current thread was created by user code with MAP_GROWSDOWN flag
           // and then attached to VM. See notes in os_linux.cpp.
           if (thread->osthread()->expanding_stack() == 0) {
-            thread->osthread()->set_expanding_stack();
-            if (os::Linux::manually_expand_stack(thread, addr)) {
-              thread->osthread()->clear_expanding_stack();
-              return 1;
-            }
-            thread->osthread()->clear_expanding_stack();
+             thread->osthread()->set_expanding_stack();
+             if (os::Linux::manually_expand_stack(thread, addr)) {
+               thread->osthread()->clear_expanding_stack();
+               return 1;
+             }
+             thread->osthread()->clear_expanding_stack();
           } else {
-            fatal("recursive segv. expanding stack.");
+             fatal("recursive segv. expanding stack.");
           }
         }
       }
@@ -455,7 +385,7 @@ JVM_handle_linux_signal(int sig,
           stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
         }
       } else if (sig == SIGFPE  &&
-          (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) {
+                 (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) {
         stub =
           SharedRuntime::
           continuation_for_implicit_exception(thread,
@@ -463,7 +393,7 @@ JVM_handle_linux_signal(int sig,
                                               SharedRuntime::
                                               IMPLICIT_DIVIDE_BY_ZERO);
       } else if (sig == SIGSEGV &&
-              !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) {
+               !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) {
           // Determination of interpreter/vtable stub/compiled code null exception
           stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
       }
@@ -477,9 +407,9 @@ JVM_handle_linux_signal(int sig,
     // jni_fast_Get<Primitive>Field can trap at certain pc's if a GC kicks in
     // and the heap gets shrunk before the field access.
     if ((sig == SIGSEGV) || (sig == SIGBUS)) {
-      address addr_slow = JNI_FastGetField::find_slowcase_pc(pc);
-      if (addr_slow != (address)-1) {
-        stub = addr_slow;
+      address addr = JNI_FastGetField::find_slowcase_pc(pc);
+      if (addr != (address)-1) {
+        stub = addr;
       }
     }
 
@@ -497,9 +427,7 @@ JVM_handle_linux_signal(int sig,
 
   if (stub != NULL) {
     // save all thread context in case we need to restore it
-    if (thread != NULL) {
-      thread->set_saved_exception_pc(pc);
-    }
+    if (thread != NULL) thread->set_saved_exception_pc(pc);
 
     os::Linux::ucontext_set_pc(uc, stub);
     return true;
@@ -507,7 +435,7 @@ JVM_handle_linux_signal(int sig,
 
   // signal-chaining
   if (os::Linux::chained_handler(sig, info, ucVoid)) {
-    return true;
+     return true;
   }
 
   if (!abort_if_unrecognized) {
@@ -541,7 +469,6 @@ int os::Linux::get_fpu_control_word(void) {
 void os::Linux::set_fpu_control_word(int fpu_control) {
 }
 
-
 ////////////////////////////////////////////////////////////////////////////////
 // thread stack
 
@@ -586,7 +513,7 @@ void os::print_context(outputStream *st, const void *context) {
 
   intptr_t *frame_sp = (intptr_t *)os::Linux::ucontext_get_sp(uc);
   st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(frame_sp));
-  print_hex_dump(st, (address)frame_sp, (address)(frame_sp + 8 * sizeof(intptr_t)), sizeof(intptr_t));
+  print_hex_dump(st, (address)frame_sp, (address)(frame_sp + 64), sizeof(intptr_t));
   st->cr();
 
   // Note: it may be unsafe to inspect memory near pc. For example, pc may
diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
index 328fb389f67..ccceed643ed 100644
--- a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
+++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
@@ -24,6 +24,7 @@
  */
 
 #include "precompiled.hpp"
+#include "memory/metaspaceShared.hpp"
 #include "runtime/frame.inline.hpp"
 #include "runtime/thread.inline.hpp"
 
@@ -62,12 +63,18 @@ bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava)
     intptr_t* ret_fp = NULL;
     intptr_t* ret_sp = NULL;
     ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc,
-                                                           &ret_sp, &ret_fp);
+      &ret_sp, &ret_fp);
     if (addr.pc() == NULL || ret_sp == NULL ) {
       // ucontext wasn't useful
       return false;
     }
 
+    if (MetaspaceShared::is_in_trampoline_frame(addr.pc())) {
+      // In the middle of a trampoline call. Bail out for safety.
+      // This happens rarely so shouldn't affect profiling.
+      return false;
+    }
+
     frame ret_frame(ret_sp, ret_fp, addr.pc());
     if (!ret_frame.safe_for_sender(this)) {
 #ifdef COMPILER2
@@ -91,4 +98,3 @@ bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava)
 }
 
 void JavaThread::cache_global_variables() { }
-
diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
index 2f3e2d591fb..4b91fa855ae 100644
--- a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
+++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -58,9 +58,6 @@
 private:
   bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava);
 public:
-
-  static Thread *riscv64_get_thread_helper();
-
   // These routines are only used on cpu architectures that
   // have separate register stacks (Itanium).
   static bool register_stack_overflow() { return false; }
diff --git a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
new file mode 100644
index 00000000000..60260854db6
--- /dev/null
+++ b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/register.hpp"
+#include "runtime/os.hpp"
+#include "runtime/os.inline.hpp"
+#include "runtime/vm_version.hpp"
+
+#include <asm/hwcap.h>
+#include <sys/auxv.h>
+
+#ifndef HWCAP_ISA_I
+#define HWCAP_ISA_I  (1 << ('I' - 'A'))
+#endif
+
+#ifndef HWCAP_ISA_M
+#define HWCAP_ISA_M  (1 << ('M' - 'A'))
+#endif
+
+#ifndef HWCAP_ISA_A
+#define HWCAP_ISA_A  (1 << ('A' - 'A'))
+#endif
+
+#ifndef HWCAP_ISA_F
+#define HWCAP_ISA_F  (1 << ('F' - 'A'))
+#endif
+
+#ifndef HWCAP_ISA_D
+#define HWCAP_ISA_D  (1 << ('D' - 'A'))
+#endif
+
+#ifndef HWCAP_ISA_C
+#define HWCAP_ISA_C  (1 << ('C' - 'A'))
+#endif
+
+#ifndef HWCAP_ISA_V
+#define HWCAP_ISA_V  (1 << ('V' - 'A'))
+#endif
+
+#ifndef HWCAP_ISA_B
+#define HWCAP_ISA_B  (1 << ('B' - 'A'))
+#endif
+
+#define read_csr(csr)                                           \
+({                                                              \
+        register unsigned long __v;                             \
+        __asm__ __volatile__ ("csrr %0, %1"                     \
+                              : "=r" (__v)                      \
+                              : "i" (csr)                       \
+                              : "memory");                      \
+        __v;                                                    \
+})
+
+uint32_t VM_Version::get_current_vector_length() {
+  assert(_features & CPU_V, "should not call this");
+  return (uint32_t)read_csr(CSR_VLENB);
+}
+
+void VM_Version::get_os_cpu_info() {
+
+  uint64_t auxv = getauxval(AT_HWCAP);
+
+  STATIC_ASSERT(CPU_I == HWCAP_ISA_I);
+  STATIC_ASSERT(CPU_M == HWCAP_ISA_M);
+  STATIC_ASSERT(CPU_A == HWCAP_ISA_A);
+  STATIC_ASSERT(CPU_F == HWCAP_ISA_F);
+  STATIC_ASSERT(CPU_D == HWCAP_ISA_D);
+  STATIC_ASSERT(CPU_C == HWCAP_ISA_C);
+  STATIC_ASSERT(CPU_V == HWCAP_ISA_V);
+  STATIC_ASSERT(CPU_B == HWCAP_ISA_B);
+  _features = auxv & (
+      HWCAP_ISA_I |
+      HWCAP_ISA_M |
+      HWCAP_ISA_A |
+      HWCAP_ISA_F |
+      HWCAP_ISA_D |
+      HWCAP_ISA_C |
+      HWCAP_ISA_V |
+      HWCAP_ISA_B);
+
+  if (FILE *f = fopen("/proc/cpuinfo", "r")) {
+    char buf[512], *p;
+    while (fgets(buf, sizeof (buf), f) != NULL) {
+      if ((p = strchr(buf, ':')) != NULL) {
+        if (strncmp(buf, "uarch", sizeof "uarch" - 1) == 0) {
+          char* uarch = os::strdup(p + 2);
+          uarch[strcspn(uarch, "\n")] = '\0';
+          _uarch = uarch;
+          break;
+        }
+      }
+    }
+    fclose(f);
+  }
+}
diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp
index 6362a015c3c..36f89e62a0e 100644
--- a/src/hotspot/share/c1/c1_LIR.cpp
+++ b/src/hotspot/share/c1/c1_LIR.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -251,8 +251,7 @@ void LIR_Op2::verify() const {
 
 
 LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block)
-  : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
-  , _type(type)
+  : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL, type)
   , _label(block->label())
   , _block(block)
   , _ublock(NULL)
@@ -260,8 +259,7 @@ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block
 }
 
 LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub) :
-  LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
-  , _type(type)
+  LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL, type)
   , _label(stub->entry())
   , _block(NULL)
   , _ublock(NULL)
@@ -269,8 +267,7 @@ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub) :
 }
 
 LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock)
-  : LIR_Op2(lir_cond_float_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
-  , _type(type)
+  : LIR_Op2(lir_cond_float_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL, type)
   , _label(block->label())
   , _block(block)
   , _ublock(ublock)
diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp
index 8daa0a7fd49..568354420be 100644
--- a/src/hotspot/share/c1/c1_LIR.hpp
+++ b/src/hotspot/share/c1/c1_LIR.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1585,8 +1585,8 @@ class LIR_Op2: public LIR_Op {
     : LIR_Op(code, LIR_OprFact::illegalOpr, info)
     , _opr1(opr1)
     , _opr2(opr2)
-    , _type(type)
     , _fpu_stack_size(0)
+    , _type(type)
     , _tmp1(LIR_OprFact::illegalOpr)
     , _tmp2(LIR_OprFact::illegalOpr)
     , _tmp3(LIR_OprFact::illegalOpr)
@@ -1601,13 +1601,13 @@ class LIR_Op2: public LIR_Op {
     , _opr1(opr1)
     , _opr2(opr2)
     , _type(type)
+    , _condition(condition)
     , _fpu_stack_size(0)
     , _tmp1(LIR_OprFact::illegalOpr)
     , _tmp2(LIR_OprFact::illegalOpr)
     , _tmp3(LIR_OprFact::illegalOpr)
     , _tmp4(LIR_OprFact::illegalOpr)
-    , _tmp5(LIR_OprFact::illegalOpr)
-    , _condition(condition) {
+    , _tmp5(LIR_OprFact::illegalOpr) {
     assert(code == lir_cmove, "code check");
     assert(type != T_ILLEGAL, "cmove should have type");
   }
@@ -1674,7 +1674,6 @@ class LIR_OpBranch: public LIR_Op2 {
  friend class LIR_OpVisitState;
 
  private:
-  BasicType     _type;
   Label*        _label;
   BlockBegin*   _block;  // if this is a branch to a block, this is the block
   BlockBegin*   _ublock; // if this is a float-branch, this is the unorderd block
@@ -1682,8 +1681,7 @@ class LIR_OpBranch: public LIR_Op2 {
 
  public:
   LIR_OpBranch(LIR_Condition cond, BasicType type, Label* lbl)
-    : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL)
-    , _type(type)
+    : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL, type)
     , _label(lbl)
     , _block(NULL)
     , _ublock(NULL)
@@ -1703,11 +1701,10 @@ class LIR_OpBranch: public LIR_Op2 {
     set_condition(cond);
   }
 
-  BasicType     type()        const              { return _type;        }
   Label*        label()       const              { return _label;       }
   BlockBegin*   block()       const              { return _block;       }
   BlockBegin*   ublock()      const              { return _ublock;      }
-  CodeStub*     stub()        const              { return _stub;       }
+  CodeStub*     stub()        const              { return _stub;        }
 
   void          change_block(BlockBegin* b);
   void          change_ublock(BlockBegin* b);
@@ -1805,12 +1802,12 @@ class LIR_Op4: public LIR_Op {
     , _opr3(opr3)
     , _opr4(opr4)
     , _type(type)
-    , _condition(condition)
     , _tmp1(LIR_OprFact::illegalOpr)
     , _tmp2(LIR_OprFact::illegalOpr)
     , _tmp3(LIR_OprFact::illegalOpr)
     , _tmp4(LIR_OprFact::illegalOpr)
-    , _tmp5(LIR_OprFact::illegalOpr) {
+    , _tmp5(LIR_OprFact::illegalOpr)
+    , _condition(condition) {
     assert(code == lir_cmove, "code check");
     assert(type != T_ILLEGAL, "cmove should have type");
   }
@@ -2311,19 +2308,17 @@ class LIR_List: public CompilationResourceObj {
   void jump(CodeStub* stub) {
     append(new LIR_OpBranch(lir_cond_always, T_ILLEGAL, stub));
   }
-  void branch(LIR_Condition cond, BasicType type, Label* lbl) {
-    append(new LIR_OpBranch(cond, type, lbl));
-  }
-  // Should not be used for fp comparisons
+  void branch(LIR_Condition cond, BasicType type, Label* lbl)        { append(new LIR_OpBranch(cond, type, lbl)); }
   void branch(LIR_Condition cond, BasicType type, BlockBegin* block) {
+    assert(type != T_FLOAT && type != T_DOUBLE, "no fp comparisons");
     append(new LIR_OpBranch(cond, type, block));
   }
-  // Should not be used for fp comparisons
-  void branch(LIR_Condition cond, BasicType type, CodeStub* stub) {
+  void branch(LIR_Condition cond, BasicType type, CodeStub* stub)    {
+    assert(type != T_FLOAT && type != T_DOUBLE, "no fp comparisons");
     append(new LIR_OpBranch(cond, type, stub));
   }
-  // Should only be used for fp comparisons
   void branch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* unordered) {
+    assert(type == T_FLOAT || type == T_DOUBLE, "fp comparisons only");
     append(new LIR_OpBranch(cond, type, block, unordered));
   }
 
diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp
index 1043ca45f9e..33ed6ac041b 100644
--- a/src/hotspot/share/c1/c1_LIRGenerator.cpp
+++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp
@@ -490,7 +490,6 @@ void LIRGenerator::array_range_check(LIR_Opr array, LIR_Opr index,
   } else {
     cmp_reg_mem(lir_cond_aboveEqual, index, array,
                 arrayOopDesc::length_offset_in_bytes(), T_INT, null_check_info);
-    // forward branch
     __ branch(lir_cond_aboveEqual, T_INT, stub); // forward branch
   }
 }
@@ -499,23 +498,11 @@ void LIRGenerator::array_range_check(LIR_Opr array, LIR_Opr index,
 void LIRGenerator::nio_range_check(LIR_Opr buffer, LIR_Opr index, LIR_Opr result, CodeEmitInfo* info) {
   CodeStub* stub = new RangeCheckStub(info, index);
   if (index->is_constant()) {
-#ifdef RISCV64
-    LIR_Opr left = new_register(T_INT);
-    LIR_Opr right = LIR_OprFact::intConst(index->as_jint());
-    __ load(generate_address(buffer, java_nio_Buffer::limit_offset(), T_INT), left, info);
-#else
     cmp_mem_int(lir_cond_belowEqual, buffer, java_nio_Buffer::limit_offset(), index->as_jint(), info);
-#endif
     __ branch(lir_cond_belowEqual, T_INT, stub); // forward branch
   } else {
-#ifdef RISCV64
-    LIR_Opr right = new_register(T_INT);
-     __ load(generate_address(buffer, java_nio_Buffer::limit_offset(), T_INT), right, info);
-#else
     cmp_reg_mem(lir_cond_aboveEqual, index, buffer,
                 java_nio_Buffer::limit_offset(), T_INT, info);
-#endif
-    // forward branch
     __ branch(lir_cond_aboveEqual, T_INT, stub); // forward branch
   }
   __ move(index, result);
@@ -1355,6 +1342,7 @@ void LIRGenerator::do_isPrimitive(Intrinsic* x) {
   __ cmove(lir_cond_notEqual, LIR_OprFact::intConst(0), LIR_OprFact::intConst(1), result, T_BOOLEAN);
 }
 
+
 // Example: Thread.currentThread()
 void LIRGenerator::do_currentThread(Intrinsic* x) {
   assert(x->number_of_arguments() == 0, "wrong type");
@@ -1362,6 +1350,7 @@ void LIRGenerator::do_currentThread(Intrinsic* x) {
   __ move_wide(new LIR_Address(getThreadPointer(), in_bytes(JavaThread::threadObj_offset()), T_OBJECT), reg);
 }
 
+
 void LIRGenerator::do_RegisterFinalizer(Intrinsic* x) {
   assert(x->number_of_arguments() == 1, "wrong type");
   LIRItem receiver(x->argument_at(0), this);
@@ -1812,11 +1801,11 @@ void LIRGenerator::do_NIOCheckIndex(Intrinsic* x) {
     CodeStub* stub = new RangeCheckStub(info, index.result());
     if (index.result()->is_constant()) {
       cmp_mem_int(lir_cond_belowEqual, buf.result(), java_nio_Buffer::limit_offset(), index.result()->as_jint(), info);
-      __ branch(lir_cond_belowEqual, T_INT, stub); // forward branch
+      __ branch(lir_cond_belowEqual, T_INT, stub);
     } else {
       cmp_reg_mem(lir_cond_aboveEqual, index.result(), buf.result(),
                   java_nio_Buffer::limit_offset(), T_INT, info);
-      __ branch(lir_cond_aboveEqual, T_INT, stub); // forward branch
+      __ branch(lir_cond_aboveEqual, T_INT, stub);
     }
     __ move(index.result(), result);
   } else {
diff --git a/src/hotspot/share/c1/c1_LinearScan.cpp b/src/hotspot/share/c1/c1_LinearScan.cpp
index ef5a01003eb..c1de495260b 100644
--- a/src/hotspot/share/c1/c1_LinearScan.cpp
+++ b/src/hotspot/share/c1/c1_LinearScan.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
diff --git a/src/hotspot/share/c1/c1_Runtime1.cpp b/src/hotspot/share/c1/c1_Runtime1.cpp
index 144cbe3381b..5e6bb08c50b 100644
--- a/src/hotspot/share/c1/c1_Runtime1.cpp
+++ b/src/hotspot/share/c1/c1_Runtime1.cpp
@@ -243,7 +243,7 @@ void Runtime1::generate_blob_for(BufferBlob* buffer_blob, StubID id) {
   case handle_exception_nofpu_id:  // Unused on sparc
 #endif
 #if defined(RISCV)
-  // TODO: Wisp
+  // TODO:Wisp
   case monitorexit_nofpu_proxy_id:
   case monitorexit_proxy_id:
 #endif
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp
index f49682741ea..6d377fa005d 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, 2021, Red Hat, Inc. All rights reserved.
+ * Copyright (c) 2018, 2022, Red Hat, Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -31,7 +31,7 @@
 #include "utilities/defaultStream.hpp"
 
 void ShenandoahArguments::initialize() {
-#if !(defined AARCH64 || defined RISCV64 || defined AMD64 || defined IA32)
+#if !(defined AARCH64 || defined AMD64 || defined IA32 || defined RISCV64)
   vm_exit_during_initialization("Shenandoah GC is not supported on this platform.");
 #endif
 
diff --git a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp
index c395d615c19..2bc6fecbf42 100644
--- a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp
+++ b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
diff --git a/src/hotspot/share/opto/matcher.hpp b/src/hotspot/share/opto/matcher.hpp
index 839c1f5275c..45835830843 100644
--- a/src/hotspot/share/opto/matcher.hpp
+++ b/src/hotspot/share/opto/matcher.hpp
@@ -338,13 +338,13 @@ class Matcher : public PhaseTransform {
   // Vector ideal reg
   static const uint vector_ideal_reg(int len);
 
+  // Does the CPU supports vector variable shift instructions?
+  static bool supports_vector_variable_shifts(void);
+
   // Vector element basic type
   static BasicType vector_element_basic_type(const Node* n);
   static BasicType vector_element_basic_type(const MachNode* use, const MachOper* opnd);
 
-  // Does the CPU supports vector variable shift instructions?
-  static bool supports_vector_variable_shifts(void);
-
   // CPU supports misaligned vectors store/load.
   static const bool misaligned_vectors_ok();
 
diff --git a/src/hotspot/share/opto/regmask.hpp b/src/hotspot/share/opto/regmask.hpp
index c64d0879592..bc856d4b617 100644
--- a/src/hotspot/share/opto/regmask.hpp
+++ b/src/hotspot/share/opto/regmask.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
diff --git a/src/hotspot/share/runtime/arguments.cpp b/src/hotspot/share/runtime/arguments.cpp
index da738e5d55c..1e8b7319bb3 100644
--- a/src/hotspot/share/runtime/arguments.cpp
+++ b/src/hotspot/share/runtime/arguments.cpp
@@ -3852,7 +3852,7 @@ void Arguments::handle_extra_cms_flags(const char* msg) {
 void Arguments::check_arguments_for_riscv64() {
   UNSUPPORTED_RISCV64_OPTS(EnableCoroutine || UseWispMonitor);
 }
-#endif //
+#endif // RISCV64
 
 // Parse entry point called from JNI_CreateJavaVM
 
diff --git a/src/hotspot/share/runtime/flags/jvmFlagConstraintsCompiler.cpp b/src/hotspot/share/runtime/flags/jvmFlagConstraintsCompiler.cpp
index e74160fdfe0..d19b43d19ac 100644
--- a/src/hotspot/share/runtime/flags/jvmFlagConstraintsCompiler.cpp
+++ b/src/hotspot/share/runtime/flags/jvmFlagConstraintsCompiler.cpp
@@ -361,7 +361,7 @@ JVMFlag::Error InteriorEntryAlignmentConstraintFunc(intx value, bool verbose) {
    }
 
   int minimum_alignment = 16;
-#if defined(SPARC) || (defined(X86) && !defined(AMD64)) || defined(RISCV)
+#if defined(SPARC) || (defined(X86) && !defined(AMD64))
   minimum_alignment = 4;
 #elif defined(S390)
   minimum_alignment = 2;
diff --git a/src/hotspot/share/runtime/os.cpp b/src/hotspot/share/runtime/os.cpp
index 93b389e6808..60ca8379e4a 100644
--- a/src/hotspot/share/runtime/os.cpp
+++ b/src/hotspot/share/runtime/os.cpp
@@ -1229,11 +1229,6 @@ bool is_pointer_bad(intptr_t* ptr) {
   return !is_aligned(ptr, sizeof(uintptr_t)) || !os::is_readable_pointer(ptr);
 }
 
-// Native stack isn't walkable for RISCV this way.
-// Native C frame and Java frame have different structure on RISCV.
-// A seperate implementation is provided under linux_riscv for RISCV.
-
-#if !defined(RISCV) || defined(ZERO)
 // Looks like all platforms can use the same function to check if C
 // stack is walkable beyond current frame. The check for fp() is not
 // necessary on Sparc, but it's harmless.
@@ -1265,7 +1260,7 @@ bool os::is_first_C_frame(frame* fr) {
 
   return false;
 }
-#endif
+
 
 // Set up the boot classpath.
 
diff --git a/src/hotspot/share/runtime/synchronizer.cpp b/src/hotspot/share/runtime/synchronizer.cpp
index 3ec62dd3953..46eefe3583b 100644
--- a/src/hotspot/share/runtime/synchronizer.cpp
+++ b/src/hotspot/share/runtime/synchronizer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
diff --git a/src/hotspot/share/runtime/thread.inline.hpp b/src/hotspot/share/runtime/thread.inline.hpp
index 9fed46f42f4..b91f31fd26e 100644
--- a/src/hotspot/share/runtime/thread.inline.hpp
+++ b/src/hotspot/share/runtime/thread.inline.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2022, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2021, Azul Systems, Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -142,7 +142,7 @@ inline void JavaThread::set_pending_async_exception(oop e) {
   set_has_async_exception();
 }
 
-#if defined(PPC64) || defined(AARCH64) || defined(RISCV64)
+#if defined(PPC64) || defined (AARCH64) || defined(RISCV64)
 inline JavaThreadState JavaThread::thread_state() const    {
   return (JavaThreadState) OrderAccess::load_acquire((volatile jint*)&_thread_state);
 }
diff --git a/src/hotspot/share/runtime/tieredThresholdPolicy.cpp b/src/hotspot/share/runtime/tieredThresholdPolicy.cpp
index c5fe1cbc19d..0e9e90a64b4 100644
--- a/src/hotspot/share/runtime/tieredThresholdPolicy.cpp
+++ b/src/hotspot/share/runtime/tieredThresholdPolicy.cpp
@@ -254,7 +254,7 @@ void TieredThresholdPolicy::initialize() {
   }
 #endif
 
-#if defined SPARC || defined AARCH64 || defined RISCV64
+#if defined SPARC || defined AARCH64
   if (FLAG_IS_DEFAULT(InlineSmallCode)) {
     FLAG_SET_DEFAULT(InlineSmallCode, 2500);
   }
diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
index 0d834302c57..45a927fb5ee 100644
--- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
+++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -58,6 +58,10 @@
 #include "sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext.h"
 #endif
 
+#ifdef riscv64
+#include "sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext.h"
+#endif
+
 static jfieldID p_ps_prochandle_ID = 0;
 static jfieldID threadList_ID = 0;
 static jfieldID loadObjectList_ID = 0;
@@ -397,7 +401,7 @@ JNIEXPORT jbyteArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
   return (err == PS_OK)? array : 0;
 }
 
-#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) | defined(ppc64) || defined(ppc64le) || defined(aarch64)
+#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) | defined(ppc64) || defined(ppc64le) || defined(aarch64) || defined(riscv64)
 JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLocal_getThreadIntegerRegisterSet0
   (JNIEnv *env, jobject this_obj, jint lwp_id) {
 
@@ -425,6 +429,9 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
 #if defined(sparc) || defined(sparcv9)
 #define NPRGREG sun_jvm_hotspot_debugger_sparc_SPARCThreadContext_NPRGREG
 #endif
+#ifdef riscv64
+#define NPRGREG sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext_NPRGREG
+#endif
 #if defined(ppc64) || defined(ppc64le)
 #define NPRGREG sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_NPRGREG
 #endif
@@ -534,6 +541,44 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
   }
 #endif /* aarch64 */
 
+#if defined(riscv64)
+#define REG_INDEX(reg)  sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext_##reg
+
+  regs[REG_INDEX(PC)]  = gregs.pc;
+  regs[REG_INDEX(LR)]  = gregs.ra;
+  regs[REG_INDEX(SP)]  = gregs.sp;
+  regs[REG_INDEX(R3)]  = gregs.gp;
+  regs[REG_INDEX(R4)]  = gregs.tp;
+  regs[REG_INDEX(R5)]  = gregs.t0;
+  regs[REG_INDEX(R6)]  = gregs.t1;
+  regs[REG_INDEX(R7)]  = gregs.t2;
+  regs[REG_INDEX(R8)]  = gregs.s0;
+  regs[REG_INDEX(R9)]  = gregs.s1;
+  regs[REG_INDEX(R10)]  = gregs.a0;
+  regs[REG_INDEX(R11)]  = gregs.a1;
+  regs[REG_INDEX(R12)]  = gregs.a2;
+  regs[REG_INDEX(R13)]  = gregs.a3;
+  regs[REG_INDEX(R14)]  = gregs.a4;
+  regs[REG_INDEX(R15)]  = gregs.a5;
+  regs[REG_INDEX(R16)]  = gregs.a6;
+  regs[REG_INDEX(R17)]  = gregs.a7;
+  regs[REG_INDEX(R18)]  = gregs.s2;
+  regs[REG_INDEX(R19)]  = gregs.s3;
+  regs[REG_INDEX(R20)]  = gregs.s4;
+  regs[REG_INDEX(R21)]  = gregs.s5;
+  regs[REG_INDEX(R22)]  = gregs.s6;
+  regs[REG_INDEX(R23)]  = gregs.s7;
+  regs[REG_INDEX(R24)]  = gregs.s8;
+  regs[REG_INDEX(R25)]  = gregs.s9;
+  regs[REG_INDEX(R26)]  = gregs.s10;
+  regs[REG_INDEX(R27)]  = gregs.s11;
+  regs[REG_INDEX(R28)]  = gregs.t3;
+  regs[REG_INDEX(R29)]  = gregs.t4;
+  regs[REG_INDEX(R30)]  = gregs.t5;
+  regs[REG_INDEX(R31)]  = gregs.t6;
+
+#endif /* riscv64 */
+
 #if defined(ppc64) || defined(ppc64le)
 #define REG_INDEX(reg) sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_##reg
 
diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
index 9d7fda8a66b..ab092d4ee33 100644
--- a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
+++ b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
index d959c4f32a1..de5254d859e 100644
--- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
+++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java
index 38e4c05bc33..9bff9ee9b15 100644
--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java
@@ -36,7 +36,7 @@
 import sun.jvm.hotspot.debugger.MachineDescriptionAMD64;
 import sun.jvm.hotspot.debugger.MachineDescriptionPPC64;
 import sun.jvm.hotspot.debugger.MachineDescriptionAArch64;
-import sun.jvm.hotspot.debugger.MachineDescriptionRiscv64;
+import sun.jvm.hotspot.debugger.MachineDescriptionRISCV64;
 import sun.jvm.hotspot.debugger.MachineDescriptionIntelX86;
 import sun.jvm.hotspot.debugger.MachineDescriptionSPARC32Bit;
 import sun.jvm.hotspot.debugger.MachineDescriptionSPARC64Bit;
@@ -593,14 +593,14 @@ private void setupDebuggerLinux() {
             machDesc = new MachineDescriptionPPC64();
         } else if (cpu.equals("aarch64")) {
             machDesc = new MachineDescriptionAArch64();
-        } else if (cpu.equals("riscv64")) {
-            machDesc = new MachineDescriptionRiscv64();
         } else if (cpu.equals("sparc")) {
             if (LinuxDebuggerLocal.getAddressSize()==8) {
                     machDesc = new MachineDescriptionSPARC64Bit();
             } else {
                     machDesc = new MachineDescriptionSPARC32Bit();
             }
+        } else if (cpu.equals("riscv64")) {
+            machDesc = new MachineDescriptionRISCV64();
         } else {
           try {
             machDesc = (MachineDescription)
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRiscv64.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java
similarity index 90%
rename from src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRiscv64.java
rename to src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java
index 0d10370a20d..a972516dee3 100644
--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRiscv64.java
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -24,7 +25,7 @@
 
 package sun.jvm.hotspot.debugger;
 
-public class MachineDescriptionRiscv64 extends MachineDescriptionTwosComplement implements MachineDescription {
+public class MachineDescriptionRISCV64 extends MachineDescriptionTwosComplement implements MachineDescription {
   public long getAddressSize() {
     return 8;
   }
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
index 5e5a6bb7141..dc0bcb3da94 100644
--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2015, Red Hat Inc.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -34,12 +34,14 @@
 import sun.jvm.hotspot.debugger.amd64.*;
 import sun.jvm.hotspot.debugger.aarch64.*;
 import sun.jvm.hotspot.debugger.sparc.*;
+import sun.jvm.hotspot.debugger.riscv64.*;
 import sun.jvm.hotspot.debugger.ppc64.*;
 import sun.jvm.hotspot.debugger.linux.x86.*;
 import sun.jvm.hotspot.debugger.linux.amd64.*;
 import sun.jvm.hotspot.debugger.linux.sparc.*;
 import sun.jvm.hotspot.debugger.linux.ppc64.*;
 import sun.jvm.hotspot.debugger.linux.aarch64.*;
+import sun.jvm.hotspot.debugger.linux.riscv64.*;
 import sun.jvm.hotspot.utilities.*;
 
 class LinuxCDebugger implements CDebugger {
@@ -116,7 +118,14 @@ public CFrame topFrameForThread(ThreadProxy thread) throws DebuggerException {
        Address pc  = context.getRegisterAsAddress(AARCH64ThreadContext.PC);
        if (pc == null) return null;
        return new LinuxAARCH64CFrame(dbg, fp, pc);
-     } else {
+    } else if (cpu.equals("riscv64")) {
+       RISCV64ThreadContext context = (RISCV64ThreadContext) thread.getContext();
+       Address fp = context.getRegisterAsAddress(RISCV64ThreadContext.FP);
+       if (fp == null) return null;
+       Address pc  = context.getRegisterAsAddress(RISCV64ThreadContext.PC);
+       if (pc == null) return null;
+       return new LinuxRISCV64CFrame(dbg, fp, pc);
+    } else {
        // Runtime exception thrown by LinuxThreadContextFactory if unknown cpu
        ThreadContext context = (ThreadContext) thread.getContext();
        return context.getTopFrame(dbg);
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java
new file mode 100644
index 00000000000..f06da24bd0e
--- /dev/null
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.linux.riscv64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.riscv64.*;
+import sun.jvm.hotspot.debugger.linux.*;
+import sun.jvm.hotspot.debugger.cdbg.*;
+import sun.jvm.hotspot.debugger.cdbg.basic.*;
+
+public final class LinuxRISCV64CFrame extends BasicCFrame {
+   private static final int C_FRAME_LINK_OFFSET        = -2;
+   private static final int C_FRAME_RETURN_ADDR_OFFSET = -1;
+
+   public LinuxRISCV64CFrame(LinuxDebugger dbg, Address fp, Address pc) {
+      super(dbg.getCDebugger());
+      this.fp = fp;
+      this.pc = pc;
+      this.dbg = dbg;
+   }
+
+   // override base class impl to avoid ELF parsing
+   public ClosestSymbol closestSymbolToPC() {
+      // try native lookup in debugger.
+      return dbg.lookup(dbg.getAddressValue(pc()));
+   }
+
+   public Address pc() {
+      return pc;
+   }
+
+   public Address localVariableBase() {
+      return fp;
+   }
+
+   public CFrame sender(ThreadProxy thread) {
+      RISCV64ThreadContext context = (RISCV64ThreadContext) thread.getContext();
+      Address rsp = context.getRegisterAsAddress(RISCV64ThreadContext.SP);
+
+      if ((fp == null) || fp.lessThan(rsp)) {
+        return null;
+      }
+
+      // Check alignment of fp
+      if (dbg.getAddressValue(fp) % (2 * ADDRESS_SIZE) != 0) {
+        return null;
+      }
+
+      Address nextFP = fp.getAddressAt(C_FRAME_LINK_OFFSET * ADDRESS_SIZE);
+      if (nextFP == null || nextFP.lessThanOrEqual(fp)) {
+        return null;
+      }
+      Address nextPC  = fp.getAddressAt(C_FRAME_RETURN_ADDR_OFFSET * ADDRESS_SIZE);
+      if (nextPC == null) {
+        return null;
+      }
+      return new LinuxRISCV64CFrame(dbg, nextFP, nextPC);
+   }
+
+   // package/class internals only
+   private static final int ADDRESS_SIZE = 8;
+   private Address pc;
+   private Address sp;
+   private Address fp;
+   private LinuxDebugger dbg;
+}
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java
new file mode 100644
index 00000000000..fdb841ccf3d
--- /dev/null
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.linux.riscv64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.riscv64.*;
+import sun.jvm.hotspot.debugger.linux.*;
+
+public class LinuxRISCV64ThreadContext extends RISCV64ThreadContext {
+  private LinuxDebugger debugger;
+
+  public LinuxRISCV64ThreadContext(LinuxDebugger debugger) {
+    super();
+    this.debugger = debugger;
+  }
+
+  public void setRegisterAsAddress(int index, Address value) {
+    setRegister(index, debugger.getAddressValue(value));
+  }
+
+  public Address getRegisterAsAddress(int index) {
+    return debugger.newAddress(getRegister(index));
+  }
+}
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java
new file mode 100644
index 00000000000..96d5dee47ce
--- /dev/null
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.proc.riscv64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.riscv64.*;
+import sun.jvm.hotspot.debugger.proc.*;
+import sun.jvm.hotspot.utilities.*;
+
+public class ProcRISCV64Thread implements ThreadProxy {
+    private ProcDebugger debugger;
+    private int         id;
+
+    public ProcRISCV64Thread(ProcDebugger debugger, Address addr) {
+        this.debugger = debugger;
+
+        // FIXME: the size here should be configurable. However, making it
+        // so would produce a dependency on the "types" package from the
+        // debugger package, which is not desired.
+        this.id       = (int) addr.getCIntegerAt(0, 4, true);
+    }
+
+    public ProcRISCV64Thread(ProcDebugger debugger, long id) {
+        this.debugger = debugger;
+        this.id = (int) id;
+    }
+
+    public ThreadContext getContext() throws IllegalThreadStateException {
+        ProcRISCV64ThreadContext context = new ProcRISCV64ThreadContext(debugger);
+        long[] regs = debugger.getThreadIntegerRegisterSet(id);
+        if (Assert.ASSERTS_ENABLED) {
+            Assert.that(regs.length == RISCV64ThreadContext.NPRGREG, "size mismatch");
+        }
+        for (int i = 0; i < regs.length; i++) {
+            context.setRegister(i, regs[i]);
+        }
+        return context;
+    }
+
+    public boolean canSetContext() throws DebuggerException {
+        return false;
+    }
+
+    public void setContext(ThreadContext context)
+    throws IllegalThreadStateException, DebuggerException {
+        throw new DebuggerException("Unimplemented");
+    }
+
+    public String toString() {
+        return "t@" + id;
+    }
+
+    public boolean equals(Object obj) {
+        if ((obj == null) || !(obj instanceof ProcRISCV64Thread)) {
+            return false;
+        }
+
+        return (((ProcRISCV64Thread) obj).id == id);
+    }
+
+    public int hashCode() {
+        return id;
+    }
+}
diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.cpp b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java
similarity index 56%
rename from src/hotspot/cpu/riscv/registerMap_riscv.cpp
rename to src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java
index ec644218fea..f2aa845e665 100644
--- a/src/hotspot/cpu/riscv/registerMap_riscv.cpp
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2021, Arm Limited. All rights reserved.
+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
  * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -21,25 +21,28 @@
  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  * or visit www.oracle.com if you need additional information or have any
  * questions.
+ *
  */
 
-#include "precompiled.hpp"
-#include "runtime/registerMap.hpp"
-#include "vmreg_riscv.inline.hpp"
+package sun.jvm.hotspot.debugger.proc.riscv64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.riscv64.*;
+import sun.jvm.hotspot.debugger.proc.*;
+
+public class ProcRISCV64ThreadContext extends RISCV64ThreadContext {
+    private ProcDebugger debugger;
+
+    public ProcRISCV64ThreadContext(ProcDebugger debugger) {
+        super();
+        this.debugger = debugger;
+    }
+
+    public void setRegisterAsAddress(int index, Address value) {
+        setRegister(index, debugger.getAddressValue(value));
+    }
 
-address RegisterMap::pd_location(VMReg base_reg, int slot_idx) const {
-  if (base_reg->is_VectorRegister()) {
-    assert(base_reg->is_concrete(), "must pass base reg");
-    int base_reg_enc = (base_reg->value() - ConcreteRegisterImpl::max_fpr) /
-                       VectorRegisterImpl::max_slots_per_register;
-    intptr_t offset_in_bytes = slot_idx * VMRegImpl::stack_slot_size;
-    address base_location = location(base_reg);
-    if (base_location != NULL) {
-      return base_location + offset_in_bytes;
-    } else {
-      return NULL;
+    public Address getRegisterAsAddress(int index) {
+        return debugger.newAddress(getRegister(index));
     }
-  } else {
-    return location(base_reg->next(slot_idx));
-  }
 }
diff --git a/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java
similarity index 56%
rename from src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp
rename to src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java
index f3077e0cff9..19f64b8ce2d 100644
--- a/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java
@@ -1,6 +1,7 @@
 /*
- * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -20,22 +21,26 @@
  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  * or visit www.oracle.com if you need additional information or have any
  * questions.
+ *
  */
 
-#ifndef OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP
-#define OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP
+package sun.jvm.hotspot.debugger.proc.riscv64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.proc.*;
 
-#include <sys/syscall.h>
+public class ProcRISCV64ThreadFactory implements ProcThreadFactory {
+    private ProcDebugger debugger;
 
-//
-// Support for building on older Linux systems
-//
+    public ProcRISCV64ThreadFactory(ProcDebugger debugger) {
+        this.debugger = debugger;
+    }
 
-#ifndef SYS_memfd_create
-#define SYS_memfd_create     279
-#endif
-#ifndef SYS_fallocate
-#define SYS_fallocate        47
-#endif
+    public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
+        return new ProcRISCV64Thread(debugger, threadIdentifierAddr);
+    }
 
-#endif // OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP
+    public ThreadProxy createThreadWrapper(long id) {
+        return new ProcRISCV64Thread(debugger, id);
+    }
+}
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java
new file mode 100644
index 00000000000..aecbda59023
--- /dev/null
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.remote.riscv64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.riscv64.*;
+import sun.jvm.hotspot.debugger.remote.*;
+import sun.jvm.hotspot.utilities.*;
+
+public class RemoteRISCV64Thread extends RemoteThread  {
+  public RemoteRISCV64Thread(RemoteDebuggerClient debugger, Address addr) {
+     super(debugger, addr);
+  }
+
+  public RemoteRISCV64Thread(RemoteDebuggerClient debugger, long id) {
+     super(debugger, id);
+  }
+
+  public ThreadContext getContext() throws IllegalThreadStateException {
+    RemoteRISCV64ThreadContext context = new RemoteRISCV64ThreadContext(debugger);
+    long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) :
+                                  debugger.getThreadIntegerRegisterSet(id);
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(regs.length == RISCV64ThreadContext.NPRGREG, "size of register set must match");
+    }
+    for (int i = 0; i < regs.length; i++) {
+      context.setRegister(i, regs[i]);
+    }
+    return context;
+  }
+}
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java
new file mode 100644
index 00000000000..1d3da6be5af
--- /dev/null
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.remote.riscv64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.riscv64.*;
+import sun.jvm.hotspot.debugger.remote.*;
+
+public class RemoteRISCV64ThreadContext extends RISCV64ThreadContext {
+  private RemoteDebuggerClient debugger;
+
+  public RemoteRISCV64ThreadContext(RemoteDebuggerClient debugger) {
+    super();
+    this.debugger = debugger;
+  }
+
+  public void setRegisterAsAddress(int index, Address value) {
+    setRegister(index, debugger.getAddressValue(value));
+  }
+
+  public Address getRegisterAsAddress(int index) {
+    return debugger.newAddress(getRegister(index));
+  }
+}
diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java
similarity index 55%
rename from src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp
rename to src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java
index 8ecc2f06357..725b94e25a3 100644
--- a/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java
@@ -1,6 +1,7 @@
 /*
- * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -23,21 +24,23 @@
  *
  */
 
-#include "precompiled.hpp"
-#if INCLUDE_ZGC
-#include "gc/shared/barrierSetNMethod.hpp"
-#include "utilities/debug.hpp"
+package sun.jvm.hotspot.debugger.remote.riscv64;
 
-void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) {
-  ShouldNotReachHere();
-}
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.remote.*;
 
-void BarrierSetNMethod::disarm(nmethod* nm) {
-  ShouldNotReachHere();
-}
+public class RemoteRISCV64ThreadFactory implements RemoteThreadFactory {
+  private RemoteDebuggerClient debugger;
+
+  public RemoteRISCV64ThreadFactory(RemoteDebuggerClient debugger) {
+    this.debugger = debugger;
+  }
+
+  public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
+    return new RemoteRISCV64Thread(debugger, threadIdentifierAddr);
+  }
 
-bool BarrierSetNMethod::is_armed(nmethod* nm) {
-  ShouldNotReachHere();
-  return false;
+  public ThreadProxy createThreadWrapper(long id) {
+    return new RemoteRISCV64Thread(debugger, id);
+  }
 }
-#endif
\ No newline at end of file
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/risv64/RISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/risv64/RISCV64ThreadContext.java
new file mode 100644
index 00000000000..fb60a70427a
--- /dev/null
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/risv64/RISCV64ThreadContext.java
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.riscv64;
+
+import java.lang.annotation.Native;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.cdbg.*;
+
+/** Specifies the thread context on riscv64 platforms; only a sub-portion
+ * of the context is guaranteed to be present on all operating
+ * systems. */
+
+public abstract class RISCV64ThreadContext implements ThreadContext {
+    // Taken from /usr/include/asm/sigcontext.h on Linux/RISCV64.
+
+    //  /*
+    //   * Signal context structure - contains all info to do with the state
+    //   * before the signal handler was invoked.
+    //   */
+    // struct sigcontext {
+    //   struct user_regs_struct sc_regs;
+    //   union __riscv_fp_state sc_fpregs;
+    // };
+    //
+    // struct user_regs_struct {
+    //    unsigned long pc;
+    //    unsigned long ra;
+    //    unsigned long sp;
+    //    unsigned long gp;
+    //    unsigned long tp;
+    //    unsigned long t0;
+    //    unsigned long t1;
+    //    unsigned long t2;
+    //    unsigned long s0;
+    //    unsigned long s1;
+    //    unsigned long a0;
+    //    unsigned long a1;
+    //    unsigned long a2;
+    //    unsigned long a3;
+    //    unsigned long a4;
+    //    unsigned long a5;
+    //    unsigned long a6;
+    //    unsigned long a7;
+    //    unsigned long s2;
+    //    unsigned long s3;
+    //    unsigned long s4;
+    //    unsigned long s5;
+    //    unsigned long s6;
+    //    unsigned long s7;
+    //    unsigned long s8;
+    //    unsigned long s9;
+    //    unsigned long s10;
+    //    unsigned long s11;
+    //    unsigned long t3;
+    //    unsigned long t4;
+    //    unsigned long t5;
+    //    unsigned long t6;
+    // };
+
+    // NOTE: the indices for the various registers must be maintained as
+    // listed across various operating systems. However, only a small
+    // subset of the registers' values are guaranteed to be present (and
+    // must be present for the SA's stack walking to work)
+
+    // One instance of the Native annotation is enough to trigger header generation
+    // for this file.
+    @Native
+    public static final int R0 = 0;
+    public static final int R1 = 1;
+    public static final int R2 = 2;
+    public static final int R3 = 3;
+    public static final int R4 = 4;
+    public static final int R5 = 5;
+    public static final int R6 = 6;
+    public static final int R7 = 7;
+    public static final int R8 = 8;
+    public static final int R9 = 9;
+    public static final int R10 = 10;
+    public static final int R11 = 11;
+    public static final int R12 = 12;
+    public static final int R13 = 13;
+    public static final int R14 = 14;
+    public static final int R15 = 15;
+    public static final int R16 = 16;
+    public static final int R17 = 17;
+    public static final int R18 = 18;
+    public static final int R19 = 19;
+    public static final int R20 = 20;
+    public static final int R21 = 21;
+    public static final int R22 = 22;
+    public static final int R23 = 23;
+    public static final int R24 = 24;
+    public static final int R25 = 25;
+    public static final int R26 = 26;
+    public static final int R27 = 27;
+    public static final int R28 = 28;
+    public static final int R29 = 29;
+    public static final int R30 = 30;
+    public static final int R31 = 31;
+
+    public static final int NPRGREG = 32;
+
+    public static final int PC = R0;
+    public static final int LR = R1;
+    public static final int SP = R2;
+    public static final int FP = R8;
+
+    private long[] data;
+
+    public RISCV64ThreadContext() {
+        data = new long[NPRGREG];
+    }
+
+    public int getNumRegisters() {
+        return NPRGREG;
+    }
+
+    public String getRegisterName(int index) {
+        switch (index) {
+        case LR: return "lr";
+        case SP: return "sp";
+        case PC: return "pc";
+        default:
+            return "r" + index;
+        }
+    }
+
+    public void setRegister(int index, long value) {
+        data[index] = value;
+    }
+
+    public long getRegister(int index) {
+        return data[index];
+    }
+
+    public CFrame getTopFrame(Debugger dbg) {
+        return null;
+    }
+
+    /** This can't be implemented in this class since we would have to
+     * tie the implementation to, for example, the debugging system */
+    public abstract void setRegisterAsAddress(int index, Address value);
+
+    /** This can't be implemented in this class since we would have to
+     * tie the implementation to, for example, the debugging system */
+    public abstract Address getRegisterAsAddress(int index);
+}
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
index af711671f85..954c2e82605 100644
--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -38,6 +38,7 @@
 import sun.jvm.hotspot.runtime.linux_x86.LinuxX86JavaThreadPDAccess;
 import sun.jvm.hotspot.runtime.linux_amd64.LinuxAMD64JavaThreadPDAccess;
 import sun.jvm.hotspot.runtime.linux_aarch64.LinuxAARCH64JavaThreadPDAccess;
+import sun.jvm.hotspot.runtime.linux_riscv64.LinuxRISCV64JavaThreadPDAccess;
 import sun.jvm.hotspot.runtime.linux_ppc64.LinuxPPC64JavaThreadPDAccess;
 import sun.jvm.hotspot.runtime.linux_sparc.LinuxSPARCJavaThreadPDAccess;
 import sun.jvm.hotspot.runtime.bsd_x86.BsdX86JavaThreadPDAccess;
@@ -99,6 +100,8 @@ private static synchronized void initialize(TypeDataBase db) {
                 access = new LinuxPPC64JavaThreadPDAccess();
             } else if (cpu.equals("aarch64")) {
                 access = new LinuxAARCH64JavaThreadPDAccess();
+            } else if (cpu.equals("riscv64")) {
+                access = new LinuxRISCV64JavaThreadPDAccess();
             } else {
               try {
                 access = (JavaThreadPDAccess)
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java
new file mode 100644
index 00000000000..5c2b6e0e3ea
--- /dev/null
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.runtime.linux_riscv64;
+
+import java.io.*;
+import java.util.*;
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.riscv64.*;
+import sun.jvm.hotspot.runtime.*;
+import sun.jvm.hotspot.runtime.riscv64.*;
+import sun.jvm.hotspot.types.*;
+import sun.jvm.hotspot.utilities.*;
+
+public class LinuxRISCV64JavaThreadPDAccess implements JavaThreadPDAccess {
+  private static AddressField  lastJavaFPField;
+  private static AddressField  osThreadField;
+
+  // Field from OSThread
+  private static CIntegerField osThreadThreadIDField;
+
+  // This is currently unneeded but is being kept in case we change
+  // the currentFrameGuess algorithm
+  private static final long GUESS_SCAN_RANGE = 128 * 1024;
+
+  static {
+    VM.registerVMInitializedObserver(new Observer() {
+        public void update(Observable o, Object data) {
+          initialize(VM.getVM().getTypeDataBase());
+        }
+      });
+  }
+
+  private static synchronized void initialize(TypeDataBase db) {
+    Type type = db.lookupType("JavaThread");
+    osThreadField           = type.getAddressField("_osthread");
+
+    Type anchorType = db.lookupType("JavaFrameAnchor");
+    lastJavaFPField         = anchorType.getAddressField("_last_Java_fp");
+
+    Type osThreadType = db.lookupType("OSThread");
+    osThreadThreadIDField   = osThreadType.getCIntegerField("_thread_id");
+  }
+
+  public Address getLastJavaFP(Address addr) {
+    return lastJavaFPField.getValue(addr.addOffsetTo(sun.jvm.hotspot.runtime.JavaThread.getAnchorField().getOffset()));
+  }
+
+  public Address getLastJavaPC(Address addr) {
+    return null;
+  }
+
+  public Address getBaseOfStackPointer(Address addr) {
+    return null;
+  }
+
+  public Frame getLastFramePD(JavaThread thread, Address addr) {
+    Address fp = thread.getLastJavaFP();
+    if (fp == null) {
+      return null; // no information
+    }
+    return new RISCV64Frame(thread.getLastJavaSP(), fp);
+  }
+
+  public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) {
+    return new RISCV64RegisterMap(thread, updateMap);
+  }
+
+  public Frame getCurrentFrameGuess(JavaThread thread, Address addr) {
+    ThreadProxy t = getThreadProxy(addr);
+    RISCV64ThreadContext context = (RISCV64ThreadContext) t.getContext();
+    RISCV64CurrentFrameGuess guesser = new RISCV64CurrentFrameGuess(context, thread);
+    if (!guesser.run(GUESS_SCAN_RANGE)) {
+      return null;
+    }
+    if (guesser.getPC() == null) {
+      return new RISCV64Frame(guesser.getSP(), guesser.getFP());
+    } else {
+      return new RISCV64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC());
+    }
+  }
+
+  public void printThreadIDOn(Address addr, PrintStream tty) {
+    tty.print(getThreadProxy(addr));
+  }
+
+  public void printInfoOn(Address threadAddr, PrintStream tty) {
+    tty.print("Thread id: ");
+    printThreadIDOn(threadAddr, tty);
+  }
+
+  public Address getLastSP(Address addr) {
+    ThreadProxy t = getThreadProxy(addr);
+    RISCV64ThreadContext context = (RISCV64ThreadContext) t.getContext();
+    return context.getRegisterAsAddress(RISCV64ThreadContext.SP);
+  }
+
+  public ThreadProxy getThreadProxy(Address addr) {
+    // Addr is the address of the JavaThread.
+    // Fetch the OSThread (for now and for simplicity, not making a
+    // separate "OSThread" class in this package)
+    Address osThreadAddr = osThreadField.getValue(addr);
+    // Get the address of the _thread_id from the OSThread
+    Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset());
+
+    JVMDebugger debugger = VM.getVM().getDebugger();
+    return debugger.getThreadForIdentifierAddress(threadIdAddr);
+  }
+}
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java
new file mode 100644
index 00000000000..34701c6922f
--- /dev/null
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2019, Red Hat Inc.
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.runtime.riscv64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.riscv64.*;
+import sun.jvm.hotspot.code.*;
+import sun.jvm.hotspot.interpreter.*;
+import sun.jvm.hotspot.runtime.*;
+import sun.jvm.hotspot.runtime.riscv64.*;
+
+/** <P> Should be able to be used on all riscv64 platforms we support
+    (Linux/riscv64) to implement JavaThread's "currentFrameGuess()"
+    functionality. Input is an RISCV64ThreadContext; output is SP, FP,
+    and PC for an RISCV64Frame. Instantiation of the RISCV64Frame is
+    left to the caller, since we may need to subclass RISCV64Frame to
+    support signal handler frames on Unix platforms. </P>
+
+    <P> Algorithm is to walk up the stack within a given range (say,
+    512K at most) looking for a plausible PC and SP for a Java frame,
+    also considering those coming in from the context. If we find a PC
+    that belongs to the VM (i.e., in generated code like the
+    interpreter or CodeCache) then we try to find an associated FP.
+    We repeat this until we either find a complete frame or run out of
+    stack to look at. </P> */
+
+public class RISCV64CurrentFrameGuess {
+  private RISCV64ThreadContext context;
+  private JavaThread       thread;
+  private Address          spFound;
+  private Address          fpFound;
+  private Address          pcFound;
+
+  private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.riscv64.RISCV64Frame.DEBUG")
+                                       != null;
+
+  public RISCV64CurrentFrameGuess(RISCV64ThreadContext context,
+                              JavaThread thread) {
+    this.context = context;
+    this.thread  = thread;
+  }
+
+  /** Returns false if not able to find a frame within a reasonable range. */
+  public boolean run(long regionInBytesToSearch) {
+    Address sp  = context.getRegisterAsAddress(RISCV64ThreadContext.SP);
+    Address pc  = context.getRegisterAsAddress(RISCV64ThreadContext.PC);
+    Address fp  = context.getRegisterAsAddress(RISCV64ThreadContext.FP);
+    if (sp == null) {
+      // Bail out if no last java frame either
+      if (thread.getLastJavaSP() != null) {
+        setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null);
+        return true;
+      }
+      return false;
+    }
+    Address end = sp.addOffsetTo(regionInBytesToSearch);
+    VM vm       = VM.getVM();
+
+    setValues(null, null, null); // Assume we're not going to find anything
+
+    if (vm.isJavaPCDbg(pc)) {
+      if (vm.isClientCompiler()) {
+        // If the topmost frame is a Java frame, we are (pretty much)
+        // guaranteed to have a viable FP. We should be more robust
+        // than this (we have the potential for losing entire threads'
+        // stack traces) but need to see how much work we really have
+        // to do here. Searching the stack for an (SP, FP) pair is
+        // hard since it's easy to misinterpret inter-frame stack
+        // pointers as base-of-frame pointers; we also don't know the
+        // sizes of C1 frames (not registered in the nmethod) so can't
+        // derive them from SP.
+
+        setValues(sp, fp, pc);
+        return true;
+      } else {
+        if (vm.getInterpreter().contains(pc)) {
+          if (DEBUG) {
+            System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " +
+                               sp + ", fp = " + fp + ", pc = " + pc);
+          }
+          setValues(sp, fp, pc);
+          return true;
+        }
+
+        // For the server compiler, FP is not guaranteed to be valid
+        // for compiled code. In addition, an earlier attempt at a
+        // non-searching algorithm (see below) failed because the
+        // stack pointer from the thread context was pointing
+        // (considerably) beyond the ostensible end of the stack, into
+        // garbage; walking from the topmost frame back caused a crash.
+        //
+        // This algorithm takes the current PC as a given and tries to
+        // find the correct corresponding SP by walking up the stack
+        // and repeatedly performing stackwalks (very inefficient).
+        //
+        // FIXME: there is something wrong with stackwalking across
+        // adapter frames...this is likely to be the root cause of the
+        // failure with the simpler algorithm below.
+
+        for (long offset = 0;
+             offset < regionInBytesToSearch;
+             offset += vm.getAddressSize()) {
+          try {
+            Address curSP = sp.addOffsetTo(offset);
+            Frame frame = new RISCV64Frame(curSP, null, pc);
+            RegisterMap map = thread.newRegisterMap(false);
+            while (frame != null) {
+              if (frame.isEntryFrame() && frame.entryFrameIsFirst()) {
+                // We were able to traverse all the way to the
+                // bottommost Java frame.
+                // This sp looks good. Keep it.
+                if (DEBUG) {
+                  System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc);
+                }
+                setValues(curSP, null, pc);
+                return true;
+              }
+              frame = frame.sender(map);
+            }
+          } catch (Exception e) {
+            if (DEBUG) {
+              System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset);
+            }
+            // Bad SP. Try another.
+          }
+        }
+
+        // Were not able to find a plausible SP to go with this PC.
+        // Bail out.
+        return false;
+      }
+    } else {
+      // If the current program counter was not known to us as a Java
+      // PC, we currently assume that we are in the run-time system
+      // and attempt to look to thread-local storage for saved SP and
+      // FP. Note that if these are null (because we were, in fact,
+      // in Java code, i.e., vtable stubs or similar, and the SA
+      // didn't have enough insight into the target VM to understand
+      // that) then we are going to lose the entire stack trace for
+      // the thread, which is sub-optimal. FIXME.
+
+      if (DEBUG) {
+        System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " +
+                           thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP());
+      }
+      if (thread.getLastJavaSP() == null) {
+        return false; // No known Java frames on stack
+      }
+
+      // The runtime has a nasty habit of not saving fp in the frame
+      // anchor, leaving us to grovel about in the stack to find a
+      // plausible address.  Fortunately, this only happens in
+      // compiled code; there we always have a valid PC, and we always
+      // push LR and FP onto the stack as a pair, with FP at the lower
+      // address.
+      pc = thread.getLastJavaPC();
+      fp = thread.getLastJavaFP();
+      sp = thread.getLastJavaSP();
+
+      if (fp == null) {
+        CodeCache cc = vm.getCodeCache();
+        if (cc.contains(pc)) {
+          CodeBlob cb = cc.findBlob(pc);
+          if (DEBUG) {
+            System.out.println("FP is null.  Found blob frame size " + cb.getFrameSize());
+          }
+          // See if we can derive a frame pointer from SP and PC
+          long link_offset = cb.getFrameSize() - 2 * VM.getVM().getAddressSize();
+          if (link_offset >= 0) {
+            fp = sp.addOffsetTo(link_offset);
+          }
+        }
+      }
+
+      // We found a PC in the frame anchor. Check that it's plausible, and
+      // if it is, use it.
+      if (vm.isJavaPCDbg(pc)) {
+        setValues(sp, fp, pc);
+      } else {
+        setValues(sp, fp, null);
+      }
+
+      return true;
+    }
+  }
+
+  public Address getSP() { return spFound; }
+  public Address getFP() { return fpFound; }
+  /** May be null if getting values from thread-local storage; take
+      care to call the correct RISCV64Frame constructor to recover this if
+      necessary */
+  public Address getPC() { return pcFound; }
+
+  private void setValues(Address sp, Address fp, Address pc) {
+    spFound = sp;
+    fpFound = fp;
+    pcFound = pc;
+  }
+}
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java
new file mode 100644
index 00000000000..e372bc5f7be
--- /dev/null
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java
@@ -0,0 +1,554 @@
+/*
+ * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2019, Red Hat Inc.
+ * Copyright (c) 2021, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.runtime.riscv64;
+
+import java.util.*;
+import sun.jvm.hotspot.code.*;
+import sun.jvm.hotspot.compiler.*;
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.oops.*;
+import sun.jvm.hotspot.runtime.*;
+import sun.jvm.hotspot.types.*;
+import sun.jvm.hotspot.utilities.*;
+
+/** Specialization of and implementation of abstract methods of the
+    Frame class for the riscv64 family of CPUs. */
+
+public class RISCV64Frame extends Frame {
+  private static final boolean DEBUG;
+  static {
+    DEBUG = System.getProperty("sun.jvm.hotspot.runtime.RISCV64.RISCV64Frame.DEBUG") != null;
+  }
+
+  // Java frames
+  private static final int LINK_OFFSET                =  -2;
+  private static final int RETURN_ADDR_OFFSET         =  -1;
+  private static final int SENDER_SP_OFFSET           =   0;
+
+  // Interpreter frames
+  private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -3;
+  private static final int INTERPRETER_FRAME_LAST_SP_OFFSET   = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1;
+  private static final int INTERPRETER_FRAME_METHOD_OFFSET    = INTERPRETER_FRAME_LAST_SP_OFFSET - 1;
+  private static       int INTERPRETER_FRAME_MDX_OFFSET;         // Non-core builds only
+  private static       int INTERPRETER_FRAME_PADDING_OFFSET;
+  private static       int INTERPRETER_FRAME_MIRROR_OFFSET;
+  private static       int INTERPRETER_FRAME_CACHE_OFFSET;
+  private static       int INTERPRETER_FRAME_LOCALS_OFFSET;
+  private static       int INTERPRETER_FRAME_BCX_OFFSET;
+  private static       int INTERPRETER_FRAME_INITIAL_SP_OFFSET;
+  private static       int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET;
+  private static       int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET;
+
+  // Entry frames
+  private static       int ENTRY_FRAME_CALL_WRAPPER_OFFSET = -10;
+
+  // Native frames
+  private static final int NATIVE_FRAME_INITIAL_PARAM_OFFSET =  2;
+
+  private static VMReg fp = new VMReg(8);
+
+  static {
+    VM.registerVMInitializedObserver(new Observer() {
+        public void update(Observable o, Object data) {
+          initialize(VM.getVM().getTypeDataBase());
+        }
+      });
+  }
+
+  private static synchronized void initialize(TypeDataBase db) {
+    INTERPRETER_FRAME_MDX_OFFSET                  = INTERPRETER_FRAME_METHOD_OFFSET - 1;
+    INTERPRETER_FRAME_PADDING_OFFSET              = INTERPRETER_FRAME_MDX_OFFSET - 1;
+    INTERPRETER_FRAME_MIRROR_OFFSET               = INTERPRETER_FRAME_PADDING_OFFSET - 1;
+    INTERPRETER_FRAME_CACHE_OFFSET                = INTERPRETER_FRAME_MIRROR_OFFSET - 1;
+    INTERPRETER_FRAME_LOCALS_OFFSET               = INTERPRETER_FRAME_CACHE_OFFSET - 1;
+    INTERPRETER_FRAME_BCX_OFFSET                  = INTERPRETER_FRAME_LOCALS_OFFSET - 1;
+    INTERPRETER_FRAME_INITIAL_SP_OFFSET           = INTERPRETER_FRAME_BCX_OFFSET - 1;
+    INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET    = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
+    INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
+  }
+
+
+  // an additional field beyond sp and pc:
+  Address raw_fp; // frame pointer
+  private Address raw_unextendedSP;
+
+  private RISCV64Frame() {
+  }
+
+  private void adjustForDeopt() {
+    if ( pc != null) {
+      // Look for a deopt pc and if it is deopted convert to original pc
+      CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc);
+      if (cb != null && cb.isJavaMethod()) {
+        NMethod nm = (NMethod) cb;
+        if (pc.equals(nm.deoptHandlerBegin())) {
+          if (Assert.ASSERTS_ENABLED) {
+            Assert.that(this.getUnextendedSP() != null, "null SP in Java frame");
+          }
+          // adjust pc if frame is deoptimized.
+          pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset());
+          deoptimized = true;
+        }
+      }
+    }
+  }
+
+  public RISCV64Frame(Address raw_sp, Address raw_fp, Address pc) {
+    this.raw_sp = raw_sp;
+    this.raw_unextendedSP = raw_sp;
+    this.raw_fp = raw_fp;
+    this.pc = pc;
+    adjustUnextendedSP();
+
+    // Frame must be fully constructed before this call
+    adjustForDeopt();
+
+    if (DEBUG) {
+      System.out.println("RISCV64Frame(sp, fp, pc): " + this);
+      dumpStack();
+    }
+  }
+
+  public RISCV64Frame(Address raw_sp, Address raw_fp) {
+    this.raw_sp = raw_sp;
+    this.raw_unextendedSP = raw_sp;
+    this.raw_fp = raw_fp;
+
+    // We cannot assume SP[-1] always contains a valid return PC (e.g. if
+    // the callee is a C/C++ compiled frame). If the PC is not known to
+    // Java then this.pc is null.
+    Address savedPC = raw_sp.getAddressAt(-1 * VM.getVM().getAddressSize());
+    if (VM.getVM().isJavaPCDbg(savedPC)) {
+      this.pc = savedPC;
+    }
+
+    adjustUnextendedSP();
+
+    // Frame must be fully constructed before this call
+    adjustForDeopt();
+
+    if (DEBUG) {
+      System.out.println("RISCV64Frame(sp, fp): " + this);
+      dumpStack();
+    }
+  }
+
+  public RISCV64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) {
+    this.raw_sp = raw_sp;
+    this.raw_unextendedSP = raw_unextendedSp;
+    this.raw_fp = raw_fp;
+    this.pc = pc;
+    adjustUnextendedSP();
+
+    // Frame must be fully constructed before this call
+    adjustForDeopt();
+
+    if (DEBUG) {
+      System.out.println("RISCV64Frame(sp, unextendedSP, fp, pc): " + this);
+      dumpStack();
+    }
+
+  }
+
+  public Object clone() {
+    RISCV64Frame frame = new RISCV64Frame();
+    frame.raw_sp = raw_sp;
+    frame.raw_unextendedSP = raw_unextendedSP;
+    frame.raw_fp = raw_fp;
+    frame.pc = pc;
+    frame.deoptimized = deoptimized;
+    return frame;
+  }
+
+  public boolean equals(Object arg) {
+    if (arg == null) {
+      return false;
+    }
+
+    if (!(arg instanceof RISCV64Frame)) {
+      return false;
+    }
+
+    RISCV64Frame other = (RISCV64Frame) arg;
+
+    return (AddressOps.equal(getSP(), other.getSP()) &&
+            AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) &&
+            AddressOps.equal(getFP(), other.getFP()) &&
+            AddressOps.equal(getPC(), other.getPC()));
+  }
+
+  public int hashCode() {
+    if (raw_sp == null) {
+      return 0;
+    }
+
+    return raw_sp.hashCode();
+  }
+
+  public String toString() {
+    return "sp: " + (getSP() == null? "null" : getSP().toString()) +
+         ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) +
+         ", fp: " + (getFP() == null? "null" : getFP().toString()) +
+         ", pc: " + (pc == null? "null" : pc.toString());
+  }
+
+  // accessors for the instance variables
+  public Address getFP() { return raw_fp; }
+  public Address getSP() { return raw_sp; }
+  public Address getID() { return raw_sp; }
+
+  // FIXME: not implemented yet
+  public boolean isSignalHandlerFrameDbg() { return false; }
+  public int     getSignalNumberDbg()      { return 0;     }
+  public String  getSignalNameDbg()        { return null;  }
+
+  public boolean isInterpretedFrameValid() {
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(isInterpretedFrame(), "Not an interpreted frame");
+    }
+
+    // These are reasonable sanity checks
+    if (getFP() == null || getFP().andWithMask(0x3) != null) {
+      return false;
+    }
+
+    if (getSP() == null || getSP().andWithMask(0x3) != null) {
+      return false;
+    }
+
+    if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) {
+      return false;
+    }
+
+    // These are hacks to keep us out of trouble.
+    // The problem with these is that they mask other problems
+    if (getFP().lessThanOrEqual(getSP())) {
+      // this attempts to deal with unsigned comparison above
+      return false;
+    }
+
+    if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) {
+      // stack frames shouldn't be large.
+      return false;
+    }
+
+    return true;
+  }
+
+  public Frame sender(RegisterMap regMap, CodeBlob cb) {
+    RISCV64RegisterMap map = (RISCV64RegisterMap) regMap;
+
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(map != null, "map must be set");
+    }
+
+    // Default is we done have to follow them. The sender_for_xxx will
+    // update it accordingly
+    map.setIncludeArgumentOops(false);
+
+    if (isEntryFrame())       return senderForEntryFrame(map);
+    if (isInterpretedFrame()) return senderForInterpreterFrame(map);
+
+    if(cb == null) {
+      cb = VM.getVM().getCodeCache().findBlob(getPC());
+    } else {
+      if (Assert.ASSERTS_ENABLED) {
+        Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same");
+      }
+    }
+
+    if (cb != null) {
+      return senderForCompiledFrame(map, cb);
+    }
+
+    // Must be native-compiled frame, i.e. the marshaling code for native
+    // methods that exists in the core system.
+    return new RISCV64Frame(getSenderSP(), getLink(), getSenderPC());
+  }
+
+  private Frame senderForEntryFrame(RISCV64RegisterMap map) {
+    if (DEBUG) {
+      System.out.println("senderForEntryFrame");
+    }
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(map != null, "map must be set");
+    }
+    // Java frame called from C; skip all C frames and return top C
+    // frame of that chunk as the sender
+    RISCV64JavaCallWrapper jcw = (RISCV64JavaCallWrapper) getEntryFrameCallWrapper();
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero");
+      Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack");
+    }
+    RISCV64Frame fr;
+    if (jcw.getLastJavaPC() != null) {
+      fr = new RISCV64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC());
+    } else {
+      fr = new RISCV64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP());
+    }
+    map.clear();
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(map.getIncludeArgumentOops(), "should be set by clear");
+    }
+    return fr;
+  }
+
+  //------------------------------------------------------------------------------
+  // frame::adjust_unextended_sp
+  private void adjustUnextendedSP() {
+    // If we are returning to a compiled MethodHandle call site, the
+    // saved_fp will in fact be a saved value of the unextended SP.  The
+    // simplest way to tell whether we are returning to such a call site
+    // is as follows:
+
+    CodeBlob cb = cb();
+    NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull();
+    if (senderNm != null) {
+      // If the sender PC is a deoptimization point, get the original
+      // PC.  For MethodHandle call site the unextended_sp is stored in
+      // saved_fp.
+      if (senderNm.isDeoptMhEntry(getPC())) {
+        raw_unextendedSP = getFP();
+      }
+      else if (senderNm.isDeoptEntry(getPC())) {
+      }
+      else if (senderNm.isMethodHandleReturn(getPC())) {
+        raw_unextendedSP = getFP();
+      }
+    }
+  }
+
+  private Frame senderForInterpreterFrame(RISCV64RegisterMap map) {
+    if (DEBUG) {
+      System.out.println("senderForInterpreterFrame");
+    }
+    Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
+    Address sp = addressOfStackSlot(SENDER_SP_OFFSET);
+    // We do not need to update the callee-save register mapping because above
+    // us is either another interpreter frame or a converter-frame, but never
+    // directly a compiled frame.
+    // 11/24/04 SFG. With the removal of adapter frames this is no longer true.
+    // However c2 no longer uses callee save register for java calls so there
+    // are no callee register to find.
+
+    if (map.getUpdateMap())
+      updateMapWithSavedLink(map, addressOfStackSlot(LINK_OFFSET));
+
+    return new RISCV64Frame(sp, unextendedSP, getLink(), getSenderPC());
+  }
+
+  private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) {
+    map.setLocation(fp, savedFPAddr);
+  }
+
+  private Frame senderForCompiledFrame(RISCV64RegisterMap map, CodeBlob cb) {
+    if (DEBUG) {
+      System.out.println("senderForCompiledFrame");
+    }
+
+    //
+    // NOTE: some of this code is (unfortunately) duplicated  RISCV64CurrentFrameGuess
+    //
+
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(map != null, "map must be set");
+    }
+
+    // frame owned by optimizing compiler
+    if (Assert.ASSERTS_ENABLED) {
+        Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size");
+    }
+    Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize());
+
+    // The return_address is always the word on the stack
+    Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize());
+
+    // This is the saved value of FP which may or may not really be an FP.
+    // It is only an FP if the sender is an interpreter frame.
+    Address savedFPAddr = senderSP.addOffsetTo(-2 * VM.getVM().getAddressSize());
+
+    if (map.getUpdateMap()) {
+      // Tell GC to use argument oopmaps for some runtime stubs that need it.
+      // For C1, the runtime stub might not have oop maps, so set this flag
+      // outside of update_register_map.
+      map.setIncludeArgumentOops(cb.callerMustGCArguments());
+
+      if (cb.getOopMaps() != null) {
+        ImmutableOopMapSet.updateRegisterMap(this, cb, map, true);
+      }
+
+      // Since the prolog does the save and restore of FP there is no oopmap
+      // for it so we must fill in its location as if there was an oopmap entry
+      // since if our caller was compiled code there could be live jvm state in it.
+      updateMapWithSavedLink(map, savedFPAddr);
+    }
+
+    return new RISCV64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC);
+  }
+
+  protected boolean hasSenderPD() {
+    return true;
+  }
+
+  public long frameSize() {
+    return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize());
+  }
+
+    public Address getLink() {
+        try {
+            if (DEBUG) {
+                System.out.println("Reading link at " + addressOfStackSlot(LINK_OFFSET)
+                        + " = " + addressOfStackSlot(LINK_OFFSET).getAddressAt(0));
+            }
+            return addressOfStackSlot(LINK_OFFSET).getAddressAt(0);
+        } catch (Exception e) {
+            if (DEBUG)
+                System.out.println("Returning null");
+            return null;
+        }
+    }
+
+  public Address getUnextendedSP() { return raw_unextendedSP; }
+
+  // Return address:
+  public Address getSenderPCAddr() { return addressOfStackSlot(RETURN_ADDR_OFFSET); }
+  public Address getSenderPC()     { return getSenderPCAddr().getAddressAt(0);      }
+
+  // return address of param, zero origin index.
+  public Address getNativeParamAddr(int idx) {
+    return addressOfStackSlot(NATIVE_FRAME_INITIAL_PARAM_OFFSET + idx);
+  }
+
+  public Address getSenderSP()     { return addressOfStackSlot(SENDER_SP_OFFSET); }
+
+  public Address addressOfInterpreterFrameLocals() {
+    return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET);
+  }
+
+  private Address addressOfInterpreterFrameBCX() {
+    return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET);
+  }
+
+  public int getInterpreterFrameBCI() {
+    // FIXME: this is not atomic with respect to GC and is unsuitable
+    // for use in a non-debugging, or reflective, system. Need to
+    // figure out how to express this.
+    Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0);
+    Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0);
+    Method method = (Method)Metadata.instantiateWrapperFor(methodHandle);
+    return bcpToBci(bcp, method);
+  }
+
+  public Address addressOfInterpreterFrameMDX() {
+    return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET);
+  }
+
+  // expression stack
+  // (the max_stack arguments are used by the GC; see class FrameClosure)
+
+  public Address addressOfInterpreterFrameExpressionStack() {
+    Address monitorEnd = interpreterFrameMonitorEnd().address();
+    return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize());
+  }
+
+  public int getInterpreterFrameExpressionStackDirection() { return -1; }
+
+  // top of expression stack
+  public Address addressOfInterpreterFrameTOS() {
+    return getSP();
+  }
+
+  /** Expression stack from top down */
+  public Address addressOfInterpreterFrameTOSAt(int slot) {
+    return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize());
+  }
+
+  public Address getInterpreterFrameSenderSP() {
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(isInterpretedFrame(), "interpreted frame expected");
+    }
+    return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
+  }
+
+  // Monitors
+  public BasicObjectLock interpreterFrameMonitorBegin() {
+    return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET));
+  }
+
+  public BasicObjectLock interpreterFrameMonitorEnd() {
+    Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0);
+    if (Assert.ASSERTS_ENABLED) {
+      // make sure the pointer points inside the frame
+      Assert.that(AddressOps.gt(getFP(), result), "result must <  than frame pointer");
+      Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer");
+    }
+    return new BasicObjectLock(result);
+  }
+
+  public int interpreterFrameMonitorSize() {
+    return BasicObjectLock.size();
+  }
+
+  // Method
+  public Address addressOfInterpreterFrameMethod() {
+    return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET);
+  }
+
+  // Constant pool cache
+  public Address addressOfInterpreterFrameCPCache() {
+    return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET);
+  }
+
+  // Entry frames
+  public JavaCallWrapper getEntryFrameCallWrapper() {
+    return new RISCV64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0));
+  }
+
+  protected Address addressOfSavedOopResult() {
+    // offset is 2 for compiler2 and 3 for compiler1
+    return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) *
+                               VM.getVM().getAddressSize());
+  }
+
+  protected Address addressOfSavedReceiver() {
+    return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize());
+  }
+
+  private void dumpStack() {
+    for (Address addr = getSP().addOffsetTo(-4 * VM.getVM().getAddressSize());
+         AddressOps.lt(addr, getSP());
+         addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
+      System.out.println(addr + ": " + addr.getAddressAt(0));
+    }
+    System.out.println("-----------------------");
+    for (Address addr = getSP();
+         AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize()));
+         addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
+      System.out.println(addr + ": " + addr.getAddressAt(0));
+    }
+  }
+}
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java
new file mode 100644
index 00000000000..850758a7ed4
--- /dev/null
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.runtime.riscv64;
+
+import java.util.*;
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.types.*;
+import sun.jvm.hotspot.runtime.*;
+import sun.jvm.hotspot.utilities.*;
+
+public class RISCV64JavaCallWrapper extends JavaCallWrapper {
+  private static AddressField lastJavaFPField;
+
+  static {
+    VM.registerVMInitializedObserver(new Observer() {
+        public void update(Observable o, Object data) {
+          initialize(VM.getVM().getTypeDataBase());
+        }
+      });
+  }
+
+  private static synchronized void initialize(TypeDataBase db) {
+    Type type = db.lookupType("JavaFrameAnchor");
+
+    lastJavaFPField  = type.getAddressField("_last_Java_fp");
+  }
+
+  public RISCV64JavaCallWrapper(Address addr) {
+    super(addr);
+  }
+
+  public Address getLastJavaFP() {
+    return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset()));
+  }
+}
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java
new file mode 100644
index 00000000000..4aeb1c6f557
--- /dev/null
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.runtime.riscv64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.runtime.*;
+
+public class RISCV64RegisterMap extends RegisterMap {
+
+  /** This is the only public constructor */
+  public RISCV64RegisterMap(JavaThread thread, boolean updateMap) {
+    super(thread, updateMap);
+  }
+
+  protected RISCV64RegisterMap(RegisterMap map) {
+    super(map);
+  }
+
+  public Object clone() {
+    RISCV64RegisterMap retval = new RISCV64RegisterMap(this);
+    return retval;
+  }
+
+  // no PD state to clear or copy:
+  protected void clearPD() {}
+  protected void initializePD() {}
+  protected void initializeFromPD(RegisterMap map) {}
+  protected Address getLocationPD(VMReg reg) { return null; }
+}
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
index 948eabcab12..6552ce255fc 100644
--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
diff --git a/src/utils/hsdis/hsdis.c b/src/utils/hsdis/hsdis.c
index da9b6f34848..d0a6f4ea846 100644
--- a/src/utils/hsdis/hsdis.c
+++ b/src/utils/hsdis/hsdis.c
@@ -36,6 +36,7 @@
 #include <bfd.h>
 #include <bfdver.h>
 #include <dis-asm.h>
+
 #include "hsdis.h"
 
 #ifndef bool
@@ -478,9 +479,6 @@ static const char* native_arch_name() {
 #endif
 #ifdef LIBARCH_s390x
   res = "s390:64-bit";
-#endif
-#ifdef LIBARCH_riscv64
-  res = "riscv:rv64";
 #endif
   if (res == NULL)
     res = "architecture not set in Makefile!";
diff --git a/test/hotspot/jtreg/ProblemList.txt b/test/hotspot/jtreg/ProblemList.txt
index 6cd261fca7c..7385ba25693 100644
--- a/test/hotspot/jtreg/ProblemList.txt
+++ b/test/hotspot/jtreg/ProblemList.txt
@@ -296,113 +296,3 @@ dragonwell
 runtime/coroutine/testJniDetachThreadHoldingMonitor.sh generic-all
 runtime/coroutine/testJniMonitorExit.sh generic-all
 gc/z/unloading/TestUnloadGarbageCollectorMXBean.java linux-aarch64
-
-gc/stress/TestReclaimStringsLeaksMemory.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-resourcehogs/serviceability/sa/TestHeapDumpForLargeArray.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-runtime/cds/CdsDifferentCompactObjectHeaders.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-serviceability/sa/CDSJMapClstats.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-serviceability/sa/ClhsdbCDSCore.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-serviceability/sa/ClhsdbCDSJstackPrintAll.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-serviceability/sa/ClhsdbFindPC.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-serviceability/sa/ClhsdbInspect.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-serviceability/sa/ClhsdbJdis.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-serviceability/sa/ClhsdbJhisto.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-serviceability/sa/ClhsdbJstack.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-serviceability/sa/ClhsdbJstackXcompStress.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-serviceability/sa/ClhsdbPrintAs.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-serviceability/sa/ClhsdbPstack.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-serviceability/sa/ClhsdbSource.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-serviceability/sa/ClhsdbThread.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-serviceability/sa/ClhsdbWhere.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-serviceability/sa/DeadlockDetectionTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-serviceability/sa/JhsdbThreadInfoTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-serviceability/sa/TestClhsdbJstackLock.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-serviceability/sa/TestHeapDumpForInvokeDynamic.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-serviceability/sa/TestJhsdbJstackLock.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-serviceability/sa/TestJhsdbJstackMixed.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-serviceability/sa/TestJmapCore.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-serviceability/sa/TestJmapCoreMetaspace.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-serviceability/sa/jmap-hprof/JMapHProfLargeHeapTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-serviceability/sa/sadebugd/DebugdConnectTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/ConflictingDefaults_v49_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/ConflictingDefaults_v49_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/ConflictingDefaults_v50_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/ConflictingDefaults_v50_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/ConflictingDefaults_v50_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/ConflictingDefaults_v50_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/ConflictingDefaults_v51_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/ConflictingDefaults_v51_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/ConflictingDefaults_v51_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/ConflictingDefaults_v51_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/ConflictingDefaults_v52_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/ConflictingDefaults_v52_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/ConflictingDefaults_v52_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/ConflictingDefaults_v52_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/DefaultVsAbstract_v49_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/DefaultVsAbstract_v49_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/DefaultVsAbstract_v49_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/DefaultVsAbstract_v49_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/DefaultVsAbstract_v50_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/DefaultVsAbstract_v50_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/DefaultVsAbstract_v50_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/DefaultVsAbstract_v50_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/DefaultVsAbstract_v51_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/DefaultVsAbstract_v51_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/DefaultVsAbstract_v51_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/DefaultVsAbstract_v51_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/DefaultVsAbstract_v52_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/DefaultVsAbstract_v52_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/DefaultVsAbstract_v52_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/DefaultVsAbstract_v52_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/MethodResolution_v49_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/MethodResolution_v49_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/MethodResolution_v49_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/MethodResolution_v49_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/MethodResolution_v50_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/MethodResolution_v50_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/MethodResolution_v50_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/MethodResolution_v50_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/MethodResolution_v51_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/MethodResolution_v51_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/MethodResolution_v51_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/MethodResolution_v51_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/MethodResolution_v52_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/MethodResolution_v52_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/MethodResolution_v52_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/MethodResolution_v52_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/PrivateMethods_v49_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/PrivateMethods_v49_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/PrivateMethods_v49_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/PrivateMethods_v49_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/PrivateMethods_v50_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/PrivateMethods_v50_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/PrivateMethods_v50_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/PrivateMethods_v50_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/PrivateMethods_v51_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/PrivateMethods_v51_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/PrivateMethods_v51_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/PrivateMethods_v51_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/PrivateMethods_v52_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/PrivateMethods_v52_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/PrivateMethods_v52_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/PrivateMethods_v52_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/StaticMethods_v49_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/StaticMethods_v49_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/StaticMethods_v49_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/StaticMethods_v49_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/StaticMethods_v50_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/StaticMethods_v50_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/StaticMethods_v50_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/StaticMethods_v50_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/StaticMethods_v51_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/StaticMethods_v51_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/StaticMethods_v51_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/StaticMethods_v51_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/StaticMethods_v52_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/StaticMethods_v52_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/StaticMethods_v52_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/StaticMethods_v52_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/SuperCall_v52_none_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/SuperCall_v52_strict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/SuperCall_v52_sync_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-vmTestbase/vm/runtime/defmeth/scenarios/SuperCall_v52_syncstrict_invoke_redefine/TestDescription.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
diff --git a/test/hotspot/jtreg/compiler/c2/TestBit.java b/test/hotspot/jtreg/compiler/c2/TestBit.java
index 7805918c28a..823b9f39dbf 100644
--- a/test/hotspot/jtreg/compiler/c2/TestBit.java
+++ b/test/hotspot/jtreg/compiler/c2/TestBit.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -34,7 +34,7 @@
  *
  * @run driver compiler.c2.TestBit
  *
- * @requires os.arch=="aarch64" | os.arch=="amd64" | os.arch == "ppc64le"
+ * @requires os.arch=="aarch64" | os.arch=="amd64" | os.arch == "ppc64le" | os.arch == "riscv64"
  * @requires vm.debug == true & vm.compiler2.enabled
  */
 public class TestBit {
@@ -54,7 +54,8 @@ static void runTest(String testName) throws Exception {
         String expectedTestBitInstruction =
             "ppc64le".equals(System.getProperty("os.arch")) ? "ANDI" :
             "aarch64".equals(System.getProperty("os.arch")) ? "tb"   :
-            "amd64".equals(System.getProperty("os.arch"))   ? "test" : null;
+            "amd64".equals(System.getProperty("os.arch"))   ? "test" :
+            "riscv64".equals(System.getProperty("os.arch")) ? "andi" : null;
 
         if (expectedTestBitInstruction != null) {
             output.shouldContain(expectedTestBitInstruction);
diff --git a/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java
index a2a8e93cc70..5a1b659bbe0 100644
--- a/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java
+++ b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java
@@ -29,7 +29,6 @@
  *
  * @build sun.hotspot.WhiteBox
  * @run driver ClassFileInstaller sun.hotspot.WhiteBox
- *                                sun.hotspot.WhiteBox$WhiteBoxPermission
  * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
  *                   -XX:-BackgroundCompilation -XX:-UseOnStackReplacement
  *                   compiler.floatingpoint.TestLibmIntrinsics
diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java
index 10664da8464..55374b116e6 100644
--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java
+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -41,8 +41,8 @@
 
 import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
 import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
-import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRiscv64CPU;
 import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
 import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
 import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU;
 
@@ -55,7 +55,7 @@ public static void main(String args[]) throws Throwable {
                         SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
                 new GenericTestCaseForUnsupportedAArch64CPU(
                         SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
-                new GenericTestCaseForUnsupportedRiscv64CPU(
+                new GenericTestCaseForUnsupportedRISCV64CPU(
                         SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
                 new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU(
                         SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java
index 88be9ac1fca..8fb82ee4531 100644
--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java
+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -41,8 +41,8 @@
 
 import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
 import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
-import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRiscv64CPU;
 import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
 import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
 import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU;
 
@@ -55,7 +55,7 @@ public static void main(String args[]) throws Throwable {
                         SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
                 new GenericTestCaseForUnsupportedAArch64CPU(
                         SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
-                new GenericTestCaseForUnsupportedRiscv64CPU(
+                new GenericTestCaseForUnsupportedRISCV64CPU(
                         SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
                 new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU(
                         SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java
index a3ee9bb9f12..aca32137eda 100644
--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java
+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -41,8 +41,8 @@
 
 import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
 import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
-import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRiscv64CPU;
 import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
 import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
 import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU;
 
@@ -55,7 +55,7 @@ public static void main(String args[]) throws Throwable {
                         SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
                 new GenericTestCaseForUnsupportedAArch64CPU(
                         SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
-                new GenericTestCaseForUnsupportedRiscv64CPU(
+                new GenericTestCaseForUnsupportedRISCV64CPU(
                         SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
                 new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU(
                         SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java
index 989345abc81..8deac4f7895 100644
--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java
+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -40,8 +40,8 @@
 
 import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
 import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
-import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRiscv64CPU;
 import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
 import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
 import compiler.intrinsics.sha.cli.testcases.UseSHASpecificTestCaseForUnsupportedCPU;
 
@@ -54,7 +54,7 @@ public static void main(String args[]) throws Throwable {
                         SHAOptionsBase.USE_SHA_OPTION),
                 new GenericTestCaseForUnsupportedAArch64CPU(
                         SHAOptionsBase.USE_SHA_OPTION),
-                new GenericTestCaseForUnsupportedRiscv64CPU(
+                new GenericTestCaseForUnsupportedRISCV64CPU(
                         SHAOptionsBase.USE_SHA_OPTION),
                 new UseSHASpecificTestCaseForUnsupportedCPU(
                         SHAOptionsBase.USE_SHA_OPTION),
diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
index e95d331a980..26635002040 100644
--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -32,15 +32,15 @@
 
 /**
  * Generic test case for SHA-related options targeted to any CPU except
- * AArch64, Riscv64, PPC, S390x, SPARC and X86.
+ * AArch64, RISCV64, PPC, S390x, SPARC and X86.
  */
 public class GenericTestCaseForOtherCPU extends
         SHAOptionsBase.TestCase {
     public GenericTestCaseForOtherCPU(String optionName) {
-        // Execute the test case on any CPU except AArch64, Riscv64, PPC, S390x, SPARC and X86.
+        // Execute the test case on any CPU except AArch64, RISCV64, PPC, S390x, SPARC and X86.
         super(optionName, new NotPredicate(
                               new OrPredicate(Platform::isAArch64,
-                              new OrPredicate(Platform::isRiscv64,
+                              new OrPredicate(Platform::isRISCV64,
                               new OrPredicate(Platform::isS390x,
                               new OrPredicate(Platform::isSparc,
                               new OrPredicate(Platform::isPPC,
@@ -52,7 +52,7 @@ public GenericTestCaseForOtherCPU(String optionName) {
     protected void verifyWarnings() throws Throwable {
         String shouldPassMessage = String.format("JVM should start with "
                 + "option '%s' without any warnings", optionName);
-        // Verify that on non-x86, non-SPARC, non-AArch64 CPU and non-Riscv64 usage of
+        // Verify that on non-x86, non-RISCV64, non-SPARC and non-AArch64 CPU usage of
         //  SHA-related options will not cause any warnings.
         CommandLineOptionTest.verifySameJVMStartup(null,
                 new String[] { ".*" + optionName + ".*" }, shouldPassMessage,
diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRiscv64CPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java
similarity index 52%
rename from test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRiscv64CPU.java
rename to test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java
index d64eda4d512..8566d57c391 100644
--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRiscv64CPU.java
+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -32,15 +32,24 @@
 import jdk.test.lib.cli.predicate.NotPredicate;
 
 /**
- * Generic test case for SHA-related options targeted to Riscv64 CPUs
+ * Generic test case for SHA-related options targeted to RISCV64 CPUs
  * which don't support instruction required by the tested option.
  */
-public class GenericTestCaseForUnsupportedRiscv64CPU extends
+public class GenericTestCaseForUnsupportedRISCV64CPU extends
         SHAOptionsBase.TestCase {
-    public GenericTestCaseForUnsupportedRiscv64CPU(String optionName) {
-        super(optionName, new AndPredicate(Platform::isRiscv64,
+
+    final private boolean checkUseSHA;
+
+    public GenericTestCaseForUnsupportedRISCV64CPU(String optionName) {
+        this(optionName, true);
+    }
+
+    public GenericTestCaseForUnsupportedRISCV64CPU(String optionName, boolean checkUseSHA) {
+        super(optionName, new AndPredicate(Platform::isRISCV64,
                 new NotPredicate(SHAOptionsBase.getPredicateForOption(
                         optionName))));
+
+        this.checkUseSHA = checkUseSHA;
     }
 
     @Override
@@ -54,22 +63,24 @@ protected void verifyWarnings() throws Throwable {
                 SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
                 CommandLineOptionTest.prepareBooleanFlag(optionName, false));
 
-        shouldPassMessage = String.format("If JVM is started with '-XX:-"
-                + "%s' '-XX:+%s', output should contain warning.",
-                SHAOptionsBase.USE_SHA_OPTION, optionName);
+        if (checkUseSHA) {
+            shouldPassMessage = String.format("If JVM is started with '-XX:-"
+                    + "%s' '-XX:+%s', output should contain warning.",
+                    SHAOptionsBase.USE_SHA_OPTION, optionName);
 
-        // Verify that when the tested option is enabled, then
-        // a warning will occur in VM output if UseSHA is disabled.
-        if (!optionName.equals(SHAOptionsBase.USE_SHA_OPTION)) {
-            CommandLineOptionTest.verifySameJVMStartup(
-                    new String[] { SHAOptionsBase.getWarningForUnsupportedCPU(optionName) },
-                    null,
-                    shouldPassMessage,
-                    shouldPassMessage,
-                    ExitCode.OK,
-                    SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
-                    CommandLineOptionTest.prepareBooleanFlag(SHAOptionsBase.USE_SHA_OPTION, false),
-                    CommandLineOptionTest.prepareBooleanFlag(optionName, true));
+            // Verify that when the tested option is enabled, then
+            // a warning will occur in VM output if UseSHA is disabled.
+            if (!optionName.equals(SHAOptionsBase.USE_SHA_OPTION)) {
+                CommandLineOptionTest.verifySameJVMStartup(
+                        new String[] { SHAOptionsBase.getWarningForUnsupportedCPU(optionName) },
+                        null,
+                        shouldPassMessage,
+                        shouldPassMessage,
+                        ExitCode.OK,
+                        SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
+                        CommandLineOptionTest.prepareBooleanFlag(SHAOptionsBase.USE_SHA_OPTION, false),
+                        CommandLineOptionTest.prepareBooleanFlag(optionName, true));
+            }
         }
     }
 
@@ -81,22 +92,24 @@ protected void verifyOptionValues() throws Throwable {
                         optionName),
                 SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS);
 
-        // Verify that option is disabled even if it was explicitly enabled
-        // using CLI options.
-        CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
-                String.format("Option '%s' should be off on unsupported "
-                        + "Riscv64CPU even if set to true directly", optionName),
-                SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
-                CommandLineOptionTest.prepareBooleanFlag(optionName, true));
+        if (checkUseSHA) {
+            // Verify that option is disabled even if it was explicitly enabled
+            // using CLI options.
+            CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
+                    String.format("Option '%s' should be off on unsupported "
+                            + "RISCV64CPU even if set to true directly", optionName),
+                    SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
+                    CommandLineOptionTest.prepareBooleanFlag(optionName, true));
 
-        // Verify that option is disabled when +UseSHA was passed to JVM.
-        CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
-                String.format("Option '%s' should be off on unsupported "
-                        + "Riscv64CPU even if %s flag set to JVM",
-                        optionName, CommandLineOptionTest.prepareBooleanFlag(
-                            SHAOptionsBase.USE_SHA_OPTION, true)),
-                SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
-                CommandLineOptionTest.prepareBooleanFlag(
-                        SHAOptionsBase.USE_SHA_OPTION, true));
+            // Verify that option is disabled when +UseSHA was passed to JVM.
+            CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
+                    String.format("Option '%s' should be off on unsupported "
+                            + "RISCV64CPU even if %s flag set to JVM",
+                            optionName, CommandLineOptionTest.prepareBooleanFlag(
+                                  SHAOptionsBase.USE_SHA_OPTION, true)),
+                    SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
+                    CommandLineOptionTest.prepareBooleanFlag(
+                            SHAOptionsBase.USE_SHA_OPTION, true));
+        }
     }
 }
diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java
index 2e3e2717a65..7be8af6d035 100644
--- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java
+++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,7 +25,7 @@
  * @test
  * @bug 8074981
  * @summary Add C2 x86 Superword support for scalar product reduction optimizations : float test
- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
  *
  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
  *      -XX:CompileThresholdScaling=0.1
diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java
index 0e06a9e4327..797927b42bf 100644
--- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java
+++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,7 +25,7 @@
  * @test
  * @bug 8074981
  * @summary Add C2 x86 Superword support for scalar product reduction optimizations : float test
- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
  *
  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
  *      -XX:CompileThresholdScaling=0.1
diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java
index c3cdbf37464..be8f7d586c2 100644
--- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java
+++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,7 +25,7 @@
  * @test
  * @bug 8074981
  * @summary Add C2 x86 Superword support for scalar product reduction optimizations : int test
- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
  *
  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
  *      -XX:CompileThresholdScaling=0.1
diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java b/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java
index d33bd411f16..d96d5e29c00 100644
--- a/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java
+++ b/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,7 +25,7 @@
  * @test
  * @bug 8074981
  * @summary Add C2 x86 Superword support for scalar product reduction optimizations : int test
- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
  *
  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions
  *      -XX:LoopUnrollLimit=250 -XX:CompileThresholdScaling=0.1
diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java
index 992fa4b5161..b09c873d05d 100644
--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java
+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,7 +25,7 @@
  * @test
  * @bug 8138583
  * @summary Add C2 AArch64 Superword support for scalar sum reduction optimizations : double abs & neg test
- * @requires os.arch=="aarch64"
+ * @requires os.arch=="aarch64" | os.arch=="riscv64"
  *
  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
  *      -XX:CompileThresholdScaling=0.1
diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java
index 3e79b3528b7..fe40ed6f98d 100644
--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java
+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,7 +25,7 @@
  * @test
  * @bug 8138583
  * @summary Add C2 AArch64 Superword support for scalar sum reduction optimizations : float abs & neg test
- * @requires os.arch=="aarch64"
+ * @requires os.arch=="aarch64" | os.arch=="riscv64"
  *
  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
  *      -XX:CompileThresholdScaling=0.1
diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java
index 6603dd224ef..51631910493 100644
--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java
+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,7 +25,7 @@
  * @test
  * @bug 8135028
  * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : double sqrt test
- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
  *
  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
  *      -XX:CompileThresholdScaling=0.1
diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java
index d9a0c988004..d999ae423cf 100644
--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java
+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,7 +25,7 @@
  * @test
  * @bug 8074981
  * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : double test
- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
  *
  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
  *      -XX:CompileThresholdScaling=0.1
diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java
index 722db95aed3..65912a5c7fa 100644
--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java
+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,7 +25,7 @@
  * @test
  * @bug 8074981
  * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : float test
- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
  *
  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
  *      -XX:CompileThresholdScaling=0.1
diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java
index f58f21feb23..fffdc2f7565 100644
--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java
+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,7 +25,7 @@
  * @test
  * @bug 8074981
  * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : int test
- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
  *
  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
  *      -XX:CompileThresholdScaling=0.1
diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
index c5e38ba72e7..2c866f26f08 100644
--- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
+++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
@@ -24,7 +24,7 @@
 
 /* @test
  * @bug 8167409
- * @requires (os.arch != "aarch64") & (os.arch != "arm") & (os.arch != "riscv64")
+ * @requires (os.arch != "aarch64") & (os.arch != "riscv64") & (os.arch != "arm")
  * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.argumentcorruption.CheckLongArgs
  */
 package compiler.runtime.criticalnatives.argumentcorruption;
diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java
index 4437367b69a..1da369fde23 100644
--- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java
+++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java
@@ -24,7 +24,7 @@
 
 /* @test
  * @bug 8167408
- * @requires (os.arch != "aarch64") & (os.arch != "arm") & (os.arch != "riscv64")
+ * @requires (os.arch != "aarch64") & (os.arch != "riscv64") & (os.arch != "arm")
  * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.lookup.LookUp
  */
 package compiler.runtime.criticalnatives.lookup;
diff --git a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
index 284b51019cf..7afe3560f30 100644
--- a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
+++ b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
diff --git a/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java b/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java
index f7c6f11c1f1..d4d43b01ae6 100644
--- a/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java
+++ b/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -112,7 +112,7 @@ public static void main(String args[]) throws Exception {
             // It's ok for ARM not to have symbols, because it does not support NMT detail
             // when targeting thumb2. It's also ok for Windows not to have symbols, because
             // they are only available if the symbols file is included with the build.
-            if (Platform.isWindows() || Platform.isARM() || Platform.isRiscv64()) {
+            if (Platform.isWindows() || Platform.isARM() || Platform.isRISCV64()) {
                 return; // we are done
             }
             output.reportDiagnosticSummary();
diff --git a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
index 8677b97b29d..eab19273ad8 100644
--- a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
+++ b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -239,8 +239,8 @@ private static boolean isAlwaysSupportedPlatform() {
         return Platform.isAix() ||
             (Platform.isLinux() &&
              (Platform.isPPC() || Platform.isS390x() || Platform.isX64() ||
-              Platform.isX86())) ||
-            Platform.isOSX() || Platform.isRiscv64() ||
+              Platform.isX86() || Platform.isRISCV64())) ||
+            Platform.isOSX() ||
             Platform.isSolaris();
     }
 
diff --git a/test/hotspot/jtreg/runtime/cds/CdsDifferentCompactObjectHeaders.java b/test/hotspot/jtreg/runtime/cds/CdsDifferentCompactObjectHeaders.java
index 604bfb678ac..9d80d7f0a3d 100644
--- a/test/hotspot/jtreg/runtime/cds/CdsDifferentCompactObjectHeaders.java
+++ b/test/hotspot/jtreg/runtime/cds/CdsDifferentCompactObjectHeaders.java
@@ -30,6 +30,7 @@
  *          should fail when loading.
  * @requires vm.cds
  * @requires vm.bits == 64
+ * @requires os.arch != "riscv64"
  * @library /test/lib
  * @run driver CdsDifferentCompactObjectHeaders
  */
diff --git a/test/hotspot/jtreg/test_env.sh b/test/hotspot/jtreg/test_env.sh
index 7f3698c47cf..0c300d4fd96 100644
--- a/test/hotspot/jtreg/test_env.sh
+++ b/test/hotspot/jtreg/test_env.sh
@@ -185,11 +185,6 @@ if [ $? = 0 ]
 then
   VM_CPU="arm"
 fi
-grep "riscv64" vm_version.out > ${NULL}
-if [ $? = 0 ]
-then
-  VM_CPU="riscv64"
-fi
 grep "ppc" vm_version.out > ${NULL}
 if [ $? = 0 ]
 then
diff --git a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
index 19db9f6c29d..d4bfe31dd7a 100644
--- a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
+++ b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
@@ -45,7 +45,7 @@
  */
 public class TestMutuallyExclusivePlatformPredicates {
     private static enum MethodGroup {
-        ARCH("isRiscv64", "isAArch64", "isARM", "isPPC", "isS390x", "isSparc", "isX64", "isX86"),
+        ARCH("isAArch64", "isARM", "isRISCV64", "isPPC", "isS390x", "isSparc", "isX64", "isX86"),
         BITNESS("is32bit", "is64bit"),
         OS("isAix", "isLinux", "isOSX", "isSolaris", "isWindows"),
         VM_TYPE("isClient", "isServer", "isGraal", "isMinimal", "isZero", "isEmbedded"),
diff --git a/test/hotspot/jtreg/vmTestbase/nsk/jvmti/GetThreadInfo/thrinfo001.java b/test/hotspot/jtreg/vmTestbase/nsk/jvmti/GetThreadInfo/thrinfo001.java
index b23e520bcf8..cb3348a0f5b 100644
--- a/test/hotspot/jtreg/vmTestbase/nsk/jvmti/GetThreadInfo/thrinfo001.java
+++ b/test/hotspot/jtreg/vmTestbase/nsk/jvmti/GetThreadInfo/thrinfo001.java
@@ -63,13 +63,13 @@ public static int run(String argv[], PrintStream ref) {
         try {
             t_a.join();
         } catch (InterruptedException e) {}
-	checkInfo(t_a, t_a.getThreadGroup(), 1);
 
         thrinfo001b t_b = new thrinfo001b();
         t_b.setPriority(Thread.MIN_PRIORITY);
         t_b.setDaemon(true);
         checkInfo(t_b, t_b.getThreadGroup(), 2);
         t_b.start();
+        checkInfo(t_b, t_b.getThreadGroup(), 2);
         try {
             t_b.join();
         } catch (InterruptedException e) {}
diff --git a/test/jdk/ProblemList.txt b/test/jdk/ProblemList.txt
index e7a0f6b70d8..3a3f73dd0ea 100644
--- a/test/jdk/ProblemList.txt
+++ b/test/jdk/ProblemList.txt
@@ -892,38 +892,3 @@ java/net/MulticastSocket/UnreferencedMulticastSockets.java https://github.com/dr
 
 java/net/SocketOption/OptionsTest.java https://github.com/dragonwell-project/dragonwell11/issues/209 linux-riscv64
 java/nio/MappedByteBuffer/Truncate.java https://github.com/dragonwell-project/dragonwell11/issues/209 linux-riscv64
-
-com/alibaba/rcm/TestDeadLoopKillObjectMonitor.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-com/alibaba/rcm/TestExceptionPreidicate.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-com/alibaba/rcm/TestKillThreads.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-com/alibaba/rcm/TestRCMInheritanceCallBack.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-com/alibaba/rcm/TestRcmCpu.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-com/alibaba/rcm/TestRcmRoot.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-com/alibaba/rcm/TestRcmUpdate.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-com/alibaba/rcm/TestStressedKillThreads.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-java/lang/invoke/PrivateInterfaceCall.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-java/lang/invoke/VarHandles/VarHandleTestAccessShort.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-java/net/httpclient/ShortResponseBodyGet.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-java/net/httpclient/ShortResponseBodyPost.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-java/net/httpclient/ShortResponseBodyPostWithRetry.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-java/util/concurrent/ConcurrentHashMap/ConcurrentContainsKeyTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-java/util/stream/test/org/openjdk/tests/java/util/stream/InfiniteStreamWithLimitOpTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-javax/accessibility/4529616/AccessibleJTableCellTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-javax/accessibility/4670319/AccessibleJTreePCESourceTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-javax/accessibility/4715503/AccessibleJTableCellBoundingRectangleTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-javax/accessibility/8283015/AccessibleJTableCellNameTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-javax/accessibility/AccessibilityProvider/basic.sh https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-javax/accessibility/JScrollPaneAccessibleRelationsTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-javax/accessibility/JTable/BooleanRendererHasAccessibleActionTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-javax/imageio/ReadAbortTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-javax/imageio/WriteAbortTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-jdk/incubator/vector/AddTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-jdk/incubator/vector/Vector128ConversionTests.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-jdk/incubator/vector/Vector256ConversionTests.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-jdk/incubator/vector/Vector64ConversionTests.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-jdk/incubator/vector/VectorMaxConversionTests.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-jdk/incubator/vector/VectorReshapeTests.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-sun/security/tools/jarsigner/Warning.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-sun/tools/jhsdb/BasicLauncherTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-sun/tools/jhsdb/HeapDumpTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-sun/tools/jhsdb/JShellHeapDumpTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
diff --git a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
index a976e1a6c19..abeff80e5e8 100644
--- a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
+++ b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -54,8 +54,8 @@ public static void main(String[] args) throws Throwable {
             Events.assertField(event, "hwThreads").atLeast(1);
             Events.assertField(event, "cores").atLeast(1);
             Events.assertField(event, "sockets").atLeast(1);
-            Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390", "Riscv64");
-            Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390", "Riscv64");
+            Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390");
+            Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390");
         }
     }
 }
diff --git a/test/langtools/ProblemList.txt b/test/langtools/ProblemList.txt
index 0f91307930c..9ad2b728dcd 100644
--- a/test/langtools/ProblemList.txt
+++ b/test/langtools/ProblemList.txt
@@ -77,6 +77,3 @@ tools/sjavac/ClasspathDependencies.java						8158002	   generic-all	  Requires i
 #
 # jdeps 
 
-# hotspot riscv
-jdk/jshell/JdiHangingLaunchExecutionControlTest.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
-tools/javac/failover/CheckAttributedTree.java https://github.com/dragonwell-project/dragonwell11/issues/770 linux-riscv64
diff --git a/test/lib/jdk/test/lib/Platform.java b/test/lib/jdk/test/lib/Platform.java
index e9875f321a5..b310873d1b6 100644
--- a/test/lib/jdk/test/lib/Platform.java
+++ b/test/lib/jdk/test/lib/Platform.java
@@ -202,7 +202,7 @@ public static boolean isARM() {
         return isArch("arm.*");
     }
 
-    public static boolean isRiscv64() {
+    public static boolean isRISCV64() {
         return isArch("riscv64");
     }