From cfb4d7bcf7c0ec9370a90f0b858b435c57b8edeb Mon Sep 17 00:00:00 2001 From: zifeihan Date: Wed, 21 Feb 2024 16:35:51 +0800 Subject: [PATCH] 8319900: Recursive lightweight locking: riscv64 implementation --- .../cpu/riscv/c1_MacroAssembler_riscv.cpp | 8 +- .../cpu/riscv/c2_MacroAssembler_riscv.cpp | 443 ++++++++++++++---- .../cpu/riscv/c2_MacroAssembler_riscv.hpp | 4 +- src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 19 - .../cpu/riscv/macroAssembler_riscv.cpp | 167 ++++--- .../cpu/riscv/macroAssembler_riscv.hpp | 4 +- src/hotspot/cpu/riscv/riscv.ad | 38 +- src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 6 +- src/hotspot/cpu/riscv/vm_version_riscv.hpp | 2 + 9 files changed, 492 insertions(+), 199 deletions(-) diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp index 2961b1a91ceab..256da017d4924 100644 --- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp @@ -69,13 +69,12 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr bnez(temp, slow_case, true /* is_far */); } - // Load object header - ld(hdr, Address(obj, hdr_offset)); - if (LockingMode == LM_LIGHTWEIGHT) { lightweight_lock(obj, hdr, temp, t1, slow_case); } else if (LockingMode == LM_LEGACY) { Label done; + // Load object header + ld(hdr, Address(obj, hdr_offset)); // and mark it as unlocked ori(hdr, hdr, markWord::unlocked_value); // save unlocked object header into the displaced header location on the stack @@ -134,9 +133,6 @@ void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_ verify_oop(obj); if (LockingMode == LM_LIGHTWEIGHT) { - ld(hdr, Address(obj, oopDesc::mark_offset_in_bytes())); - test_bit(temp, hdr, exact_log2(markWord::monitor_value)); - bnez(temp, slow_case, /* is_far */ true); lightweight_unlock(obj, hdr, temp, t1, slow_case); } else if (LockingMode == LM_LEGACY) { // test if object header is pointing to the displaced header, and if so, restore diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp index 9670bc987a304..a87415d6b5388 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp @@ -32,6 +32,7 @@ #include "opto/output.hpp" #include "opto/subnode.hpp" #include "runtime/stubRoutines.hpp" +#include "utilities/globalDefinitions.hpp" #ifdef PRODUCT #define BLOCK_COMMENT(str) /* nothing */ @@ -51,30 +52,35 @@ void C2_MacroAssembler::fast_lock(Register objectReg, Register boxReg, Register box = boxReg; Register disp_hdr = tmp1Reg; Register tmp = tmp2Reg; - Label cont; Label object_has_monitor; - Label count, no_count; + // Finish fast lock successfully. MUST branch to with flag == 0 + Label locked; + // Finish fast lock unsuccessfully. slow_path MUST branch to with flag != 0 + Label slow_path; + assert(LockingMode != LM_LIGHTWEIGHT, "lightweight locking should use fast_lock_lightweight"); assert_different_registers(oop, box, tmp, disp_hdr, flag, tmp3Reg, t0); + mv(flag, 1); + // Load markWord from object into displaced_header. ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes())); if (DiagnoseSyncOnValueBasedClasses != 0) { - load_klass(flag, oop); - lwu(flag, Address(flag, Klass::access_flags_offset())); - test_bit(flag, flag, exact_log2(JVM_ACC_IS_VALUE_BASED_CLASS)); - bnez(flag, cont, true /* is_far */); + load_klass(tmp, oop); + lwu(tmp, Address(tmp, Klass::access_flags_offset())); + test_bit(tmp, tmp, exact_log2(JVM_ACC_IS_VALUE_BASED_CLASS)); + bnez(tmp, slow_path); } // Check for existing monitor - test_bit(t0, disp_hdr, exact_log2(markWord::monitor_value)); - bnez(t0, object_has_monitor); + test_bit(tmp, disp_hdr, exact_log2(markWord::monitor_value)); + bnez(tmp, object_has_monitor); if (LockingMode == LM_MONITOR) { - mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow-path - j(cont); - } else if (LockingMode == LM_LEGACY) { + j(slow_path); + } else { + assert(LockingMode == LM_LEGACY, "must be"); // Set tmp to be (markWord of object | UNLOCK_VALUE). ori(tmp, disp_hdr, markWord::unlocked_value); @@ -84,10 +90,9 @@ void C2_MacroAssembler::fast_lock(Register objectReg, Register boxReg, // Compare object markWord with an unlocked value (tmp) and if // equal exchange the stack address of our box with object markWord. // On failure disp_hdr contains the possibly locked markWord. - cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, Assembler::aq, - Assembler::rl, /*result*/disp_hdr); - mv(flag, zr); - beq(disp_hdr, tmp, cont); // prepare zero flag and goto cont if we won the cas + cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, + Assembler::aq, Assembler::rl, /*result*/disp_hdr); + beq(disp_hdr, tmp, locked); assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); @@ -104,19 +109,8 @@ void C2_MacroAssembler::fast_lock(Register objectReg, Register boxReg, // recursive lock. andr(tmp/*==0?*/, disp_hdr, tmp); sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes())); - mv(flag, tmp); // we can use the value of tmp as the result here - j(cont); - } else { - assert(LockingMode == LM_LIGHTWEIGHT, ""); - Label slow; - lightweight_lock(oop, disp_hdr, tmp, tmp3Reg, slow); - - // Indicate success on completion. - mv(flag, zr); - j(count); - bind(slow); - mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow-path - j(no_count); + beqz(tmp, locked); + j(slow_path); } // Handle existing monitor. @@ -126,35 +120,41 @@ void C2_MacroAssembler::fast_lock(Register objectReg, Register boxReg, // // Try to CAS m->owner from null to current thread. add(tmp, disp_hdr, (in_bytes(ObjectMonitor::owner_offset()) - markWord::monitor_value)); - cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq, - Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected) - - if (LockingMode != LM_LIGHTWEIGHT) { - // Store a non-null value into the box to avoid looking like a re-entrant - // lock. The fast-path monitor unlock code checks for - // markWord::monitor_value so use markWord::unused_mark which has the - // relevant bit set, and also matches ObjectSynchronizer::slow_enter. - mv(tmp, (address)markWord::unused_mark().value()); - sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); - } + cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, + Assembler::aq, Assembler::rl, /*result*/tmp3Reg); // cas succeeds if tmp3Reg == zr(expected) - beqz(flag, cont); // CAS success means locking succeeded + // Store a non-null value into the box to avoid looking like a re-entrant + // lock. The fast-path monitor unlock code checks for + // markWord::monitor_value so use markWord::unused_mark which has the + // relevant bit set, and also matches ObjectSynchronizer::slow_enter. + mv(tmp, (address)markWord::unused_mark().value()); + sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); - bne(flag, xthread, cont); // Check for recursive locking + beqz(tmp3Reg, locked); // CAS success means locking succeeded + + bne(tmp3Reg, xthread, slow_path); // Check for recursive locking // Recursive lock case - mv(flag, zr); increment(Address(disp_hdr, in_bytes(ObjectMonitor::recursions_offset()) - markWord::monitor_value), 1, t0, tmp); - bind(cont); - // zero flag indicates success - // non-zero flag indicates failure - bnez(flag, no_count); - - bind(count); + bind(locked); + mv(flag, zr); increment(Address(xthread, JavaThread::held_monitor_count_offset()), 1, t0, tmp); - bind(no_count); +#ifdef ASSERT + // Check that unlocked label is reached with flags == 0. + Label flag_correct; + beqz(flag, flag_correct); + stop("Fast Lock Flag != 0"); +#endif + bind(slow_path); +#ifdef ASSERT + // Check that slow_path label is reached with flags != 0. + bnez(flag, flag_correct); + stop("Fast Lock Flag == 0"); + bind(flag_correct); +#endif + // C2 uses the value of flags (0 vs !0) to determine the continuation. } void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, @@ -165,19 +165,23 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, Register box = boxReg; Register disp_hdr = tmp1Reg; Register tmp = tmp2Reg; - Label cont; Label object_has_monitor; - Label count, no_count; + // Finish fast lock successfully. MUST branch to with flag == 0 + Label unlocked; + // Finish fast lock unsuccessfully. slow_path MUST branch to with flag != 0 + Label slow_path; + assert(LockingMode != LM_LIGHTWEIGHT, "lightweight locking should use fast_unlock_lightweight"); assert_different_registers(oop, box, tmp, disp_hdr, flag, t0); + mv(flag, 1); + if (LockingMode == LM_LEGACY) { // Find the lock address and load the displaced header from the stack. ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes())); // If the displaced header is 0, we have a recursive unlock. - mv(flag, disp_hdr); - beqz(disp_hdr, cont); + beqz(disp_hdr, unlocked); } // Handle existing monitor. @@ -186,28 +190,17 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, bnez(t0, object_has_monitor); if (LockingMode == LM_MONITOR) { - mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow path - j(cont); - } else if (LockingMode == LM_LEGACY) { + j(slow_path); + } else { + assert(LockingMode == LM_LEGACY, "must be"); // Check if it is still a light weight lock, this is true if we // see the stack address of the basicLock in the markWord of the // object. - cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed, - Assembler::rl, /*result*/tmp); - xorr(flag, box, tmp); // box == tmp if cas succeeds - j(cont); - } else { - assert(LockingMode == LM_LIGHTWEIGHT, ""); - Label slow; - lightweight_unlock(oop, tmp, box, disp_hdr, slow); - - // Indicate success on completion. - mv(flag, zr); - j(count); - bind(slow); - mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow path - j(no_count); + cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, + Assembler::relaxed, Assembler::rl, /*result*/tmp); + beq(box, tmp, unlocked); // box == tmp if cas succeeds + j(slow_path); } assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); @@ -217,17 +210,6 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, STATIC_ASSERT(markWord::monitor_value <= INT_MAX); add(tmp, tmp, -(int)markWord::monitor_value); // monitor - if (LockingMode == LM_LIGHTWEIGHT) { - // If the owner is anonymous, we need to fix it -- in an outline stub. - Register tmp2 = disp_hdr; - ld(tmp2, Address(tmp, ObjectMonitor::owner_offset())); - test_bit(t0, tmp2, exact_log2(ObjectMonitor::ANONYMOUS_OWNER)); - C2HandleAnonOMOwnerStub* stub = new (Compile::current()->comp_arena()) C2HandleAnonOMOwnerStub(tmp, tmp2); - Compile::current()->output()->add_stub(stub); - bnez(t0, stub->entry(), /* is_far */ true); - bind(stub->continuation()); - } - ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset())); Label notRecursive; @@ -236,28 +218,301 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, // Recursive lock addi(disp_hdr, disp_hdr, -1); sd(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset())); - mv(flag, zr); - j(cont); + j(unlocked); bind(notRecursive); - ld(flag, Address(tmp, ObjectMonitor::EntryList_offset())); + ld(t0, Address(tmp, ObjectMonitor::EntryList_offset())); ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset())); - orr(flag, flag, disp_hdr); // Will be 0 if both are 0. - bnez(flag, cont); + orr(t0, t0, disp_hdr); // Will be 0 if both are 0. + bnez(t0, slow_path); + // need a release store here la(tmp, Address(tmp, ObjectMonitor::owner_offset())); membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); sd(zr, Address(tmp)); // set unowned - bind(cont); - // zero flag indicates success - // non-zero flag indicates failure - bnez(flag, no_count); - - bind(count); + bind(unlocked); + mv(flag, zr); decrement(Address(xthread, JavaThread::held_monitor_count_offset()), 1, t0, tmp); - bind(no_count); +#ifdef ASSERT + // Check that unlocked label is reached with flags == 0. + Label flag_correct; + beqz(flag, flag_correct); + stop("Fast Lock Flag != 0"); +#endif + bind(slow_path); +#ifdef ASSERT + // Check that slow_path label is reached with flags != 0. + bnez(flag, flag_correct); + stop("Fast Lock Flag == 0"); + bind(flag_correct); +#endif + // C2 uses the value of flags (0 vs !0) to determine the continuation. +} + +void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register tmp1, Register tmp2, Register tmp3) { + // Flag register, zero for success; non-zero for failure. + Register flag = t1; + + assert(LockingMode == LM_LIGHTWEIGHT, "must be"); + assert_different_registers(obj, tmp1, tmp2, tmp3, flag, t0); + + mv(flag, 1); + + // Handle inflated monitor. + Label inflated; + // Finish fast lock successfully. MUST branch to with flag == 0 + Label locked; + // Finish fast lock unsuccessfully. slow_path MUST branch to with flag != 0 + Label slow_path; + + if (DiagnoseSyncOnValueBasedClasses != 0) { + load_klass(tmp1, obj); + lwu(tmp1, Address(tmp1, Klass::access_flags_offset())); + test_bit(tmp1, tmp1, exact_log2(JVM_ACC_IS_VALUE_BASED_CLASS)); + bnez(tmp1, slow_path); + } + + const Register tmp1_mark = tmp1; + + { // Lightweight locking + + // Push lock to the lock stack and finish successfully. MUST branch to with flag == 0 + Label push; + + const Register tmp2_top = tmp2; + const Register tmp3_t = tmp3; + + // Check if lock-stack is full. + lwu(tmp2_top, Address(xthread, JavaThread::lock_stack_top_offset())); + mv(tmp3_t, (unsigned)LockStack::end_offset()); + bge(tmp2_top, tmp3_t, slow_path); + + // Check if recursive. + add(tmp3_t, xthread, tmp2_top); + ld(tmp3_t, Address(tmp3_t, -oopSize)); + beq(obj, tmp3_t, push); + + // Relaxed normal load to check for monitor. Optimization for monitor case. + ld(tmp1_mark, Address(obj, oopDesc::mark_offset_in_bytes())); + test_bit(tmp3_t, tmp1_mark, exact_log2(markWord::monitor_value)); + bnez(tmp3_t, inflated); + + // Not inflated + assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid a lea"); + + // Try to lock. Transition lock-bits 0b01 => 0b00 + ori(tmp1_mark, tmp1_mark, markWord::unlocked_value); + xori(tmp3_t, tmp1_mark, markWord::unlocked_value); + cmpxchg(/*addr*/ obj, /*expected*/ tmp1_mark, /*new*/ tmp3_t, Assembler::int64, + /*acquire*/ Assembler::aq, /*release*/ Assembler::relaxed, /*result*/ tmp3_t); + bne(tmp1_mark, tmp3_t, slow_path); + + bind(push); + // After successful lock, push object on lock-stack. + add(tmp3_t, xthread, tmp2_top); + sd(obj, Address(tmp3_t)); + addw(tmp2_top, tmp2_top, oopSize); + sw(tmp2_top, Address(xthread, JavaThread::lock_stack_top_offset())); + j(locked); + } + + { // Handle inflated monitor. + bind(inflated); + + // mark contains the tagged ObjectMonitor*. + const Register tmp1_tagged_monitor = tmp1_mark; + const uintptr_t monitor_tag = markWord::monitor_value; + const Register tmp2_owner_addr = tmp2; + const Register tmp3_owner = tmp3; + + // Compute owner address. + la(tmp2_owner_addr, Address(tmp1_tagged_monitor, (in_bytes(ObjectMonitor::owner_offset()) - monitor_tag))); + + // CAS owner (null => current thread). + cmpxchg(/*addr*/ tmp2_owner_addr, /*expected*/ zr, /*new*/ xthread, Assembler::int64, + /*acquire*/ Assembler::aq, /*release*/ Assembler::relaxed, /*result*/ tmp3_owner); + beqz(tmp3_owner, locked); + + // Check if recursive. + bne(tmp3_owner, xthread, slow_path); + + // Recursive. + increment(Address(tmp1_tagged_monitor, in_bytes(ObjectMonitor::recursions_offset()) - monitor_tag), 1, t0, tmp3); + } + + bind(locked); + mv(flag, zr); + increment(Address(xthread, JavaThread::held_monitor_count_offset()), 1, t0, tmp3); + +#ifdef ASSERT + // Check that locked label is reached with flags == 0. + Label flag_correct; + beqz(flag, flag_correct); + stop("Fast Lock Flag != 0"); +#endif + bind(slow_path); +#ifdef ASSERT + // Check that slow_path label is reached with flags != 0. + bnez(flag, flag_correct); + stop("Fast Lock Flag == 0"); + bind(flag_correct); +#endif + // C2 uses the value of flags (0 vs !0) to determine the continuation. +} + +void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register tmp1, Register tmp2, + Register tmp3) { + // Flag register, zero for success; non-zero for failure. + Register flag = t1; + + assert(LockingMode == LM_LIGHTWEIGHT, "must be"); + assert_different_registers(obj, tmp1, tmp2, tmp3, flag, t0); + + mv(flag, 1); + + // Handle inflated monitor. + Label inflated, inflated_load_monitor; + // Finish fast unlock successfully. unlocked MUST branch to with flag == 0 + Label unlocked; + // Finish fast unlock unsuccessfully. MUST branch to with flag != 0 + Label slow_path; + + const Register tmp1_mark = tmp1; + const Register tmp2_top = tmp2; + const Register tmp3_t = tmp3; + + { // Lightweight unlock + + // Check if obj is top of lock-stack. + lwu(tmp2_top, Address(xthread, JavaThread::lock_stack_top_offset())); + subw(tmp2_top, tmp2_top, oopSize); + add(tmp3_t, xthread, tmp2_top); + ld(tmp3_t, Address(tmp3_t)); + // Top of lock stack was not obj. Must be monitor. + bne(obj, tmp3_t, inflated_load_monitor); + + // Pop lock-stack. + DEBUG_ONLY(add(tmp3_t, xthread, tmp2_top);) + DEBUG_ONLY(sd(zr, Address(tmp3_t));) + sw(tmp2_top, Address(xthread, JavaThread::lock_stack_top_offset())); + + // Check if recursive. + add(tmp3_t, xthread, tmp2_top); + ld(tmp3_t, Address(tmp3_t, -oopSize)); + beq(obj, tmp3_t, unlocked); + + // Not recursive. + // Load Mark. + ld(tmp1_mark, Address(obj, oopDesc::mark_offset_in_bytes())); + + // Check header for monitor (0b10). + test_bit(tmp3_t, tmp1_mark, exact_log2(markWord::monitor_value)); + bnez(tmp3_t, inflated); + + // Try to unlock. Transition lock bits 0b00 => 0b01 + assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid lea"); + ori(tmp3_t, tmp1_mark, markWord::unlocked_value); + cmpxchg(/*addr*/ obj, /*expected*/ tmp1_mark, /*new*/ tmp3_t, Assembler::int64, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, /*result*/ tmp3_t); + beq(tmp1_mark, tmp3_t, unlocked); + + // Compare and exchange failed. + // Restore lock-stack and handle the unlock in runtime. + DEBUG_ONLY(add(tmp3_t, xthread, tmp2_top);) + DEBUG_ONLY(sd(obj, Address(tmp3_t));) + addw(tmp2_top, tmp2_top, oopSize); + sd(tmp2_top, Address(xthread, JavaThread::lock_stack_top_offset())); + j(slow_path); + } + + { // Handle inflated monitor. + bind(inflated_load_monitor); + ld(tmp1_mark, Address(obj, oopDesc::mark_offset_in_bytes())); +#ifdef ASSERT + test_bit(tmp3_t, tmp1_mark, exact_log2(markWord::monitor_value)); + bnez(tmp3_t, inflated); + stop("Fast Unlock not monitor"); +#endif + + bind(inflated); + +#ifdef ASSERT + Label check_done; + subw(tmp2_top, tmp2_top, oopSize); + mv(tmp3_t, in_bytes(JavaThread::lock_stack_base_offset())); + blt(tmp2_top, tmp3_t, check_done); + add(tmp3_t, xthread, tmp2_top); + ld(tmp3_t, Address(tmp3_t)); + bne(obj, tmp3_t, inflated); + stop("Fast Unlock lock on stack"); + bind(check_done); +#endif + + // mark contains the tagged ObjectMonitor*. + const Register tmp1_monitor = tmp1_mark; + const uintptr_t monitor_tag = markWord::monitor_value; + + // Untag the monitor. + sub(tmp1_monitor, tmp1_mark, monitor_tag); + + const Register tmp2_recursions = tmp2; + Label not_recursive; + + // Check if recursive. + ld(tmp2_recursions, Address(tmp1_monitor, ObjectMonitor::recursions_offset())); + beqz(tmp2_recursions, not_recursive); + + // Recursive unlock. + sub(tmp2_recursions, tmp2_recursions, 1u); + sd(tmp2_recursions, Address(tmp1_monitor, ObjectMonitor::recursions_offset())); + j(unlocked); + + bind(not_recursive); + + Label release; + const Register tmp2_owner_addr = tmp2; + + // Compute owner address. + la(tmp2_owner_addr, Address(tmp1_monitor, ObjectMonitor::owner_offset())); + + // Check if the entry lists are empty. + ld(t0, Address(tmp1_monitor, ObjectMonitor::EntryList_offset())); + ld(tmp3_t, Address(tmp1_monitor, ObjectMonitor::cxq_offset())); + orr(t0, t0, tmp3_t); + beqz(t0, release); + + // The owner may be anonymous and we removed the last obj entry in + // the lock-stack. This loses the information about the owner. + // Write the thread to the owner field so the runtime knows the owner. + sd(xthread, Address(tmp2_owner_addr)); + j(slow_path); + + bind(release); + // Set owner to null. + membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); + sd(zr, Address(tmp2_owner_addr)); + } + + bind(unlocked); + mv(flag, zr); + decrement(Address(xthread, JavaThread::held_monitor_count_offset()), 1, tmp2, tmp3); + +#ifdef ASSERT + // Check that unlocked label is reached with flags == 0. + Label flag_correct; + beqz(flag, flag_correct); + stop("Fast Lock Flag != 0"); +#endif + bind(slow_path); +#ifdef ASSERT + // Check that slow_path label is reached with flags != 0. + bnez(flag, flag_correct); + stop("Fast Lock Flag == 0"); + bind(flag_correct); +#endif + // C2 uses the value of flags (0 vs !0) to determine the continuation. } // short string diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp index 9fe4dc002c992..25ba66387f18a 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp @@ -44,9 +44,11 @@ public: // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file. - // See full description in macroAssembler_riscv.cpp. void fast_lock(Register object, Register box, Register tmp1, Register tmp2, Register tmp3); void fast_unlock(Register object, Register box, Register tmp1, Register tmp2); + // Code used by cmpFastLockLightweight and cmpFastUnlockLightweight mach instructions in .ad file. + void fast_lock_lightweight(Register object, Register tmp1, Register tmp2, Register tmp3); + void fast_unlock_lightweight(Register object, Register tmp1, Register tmp2, Register tmp3); void string_compare(Register str1, Register str2, Register cnt1, Register cnt2, Register result, diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp index 19d665bd421d0..497918e6c05c6 100644 --- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp @@ -763,7 +763,6 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) } if (LockingMode == LM_LIGHTWEIGHT) { - ld(tmp, Address(obj_reg, oopDesc::mark_offset_in_bytes())); lightweight_lock(obj_reg, tmp, tmp2, tmp3, slow_case); j(count); } else if (LockingMode == LM_LEGACY) { @@ -860,24 +859,6 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg) if (LockingMode == LM_LIGHTWEIGHT) { Label slow_case; - - // Check for non-symmetric locking. This is allowed by the spec and the interpreter - // must handle it. - Register tmp1 = t0; - Register tmp2 = header_reg; - // First check for lock-stack underflow. - lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); - mv(tmp2, (unsigned)LockStack::start_offset()); - ble(tmp1, tmp2, slow_case); - // Then check if the top of the lock-stack matches the unlocked object. - subw(tmp1, tmp1, oopSize); - add(tmp1, xthread, tmp1); - ld(tmp1, Address(tmp1, 0)); - bne(tmp1, obj_reg, slow_case); - - ld(header_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); - test_bit(t0, header_reg, exact_log2(markWord::monitor_value)); - bnez(t0, slow_case); lightweight_unlock(obj_reg, header_reg, swap_reg, tmp_reg, slow_case); j(count); diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp index 96e07319e843f..d79ee3950bbd7 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp @@ -49,6 +49,7 @@ #include "runtime/jniHandles.inline.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/stubRoutines.hpp" +#include "utilities/globalDefinitions.hpp" #include "utilities/powerOfTwo.hpp" #ifdef COMPILER2 #include "opto/compile.hpp" @@ -5042,98 +5043,124 @@ void MacroAssembler::test_bit(Register Rd, Register Rs, uint32_t bit_pos) { } // Implements lightweight-locking. -// Branches to slow upon failure to lock the object. -// Falls through upon success. // // - obj: the object to be locked -// - hdr: the header, already loaded from obj, will be destroyed -// - tmp1, tmp2: temporary registers, will be destroyed -void MacroAssembler::lightweight_lock(Register obj, Register hdr, Register tmp1, Register tmp2, Label& slow) { +// - tmp1, tmp2, tmp3: temporary registers, will be destroyed +// - slow: branched to if locking fails +void MacroAssembler::lightweight_lock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow) { assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); - assert_different_registers(obj, hdr, tmp1, tmp2, t0); - - // Check if we would have space on lock-stack for the object. - lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); - mv(tmp2, (unsigned)LockStack::end_offset()); - bge(tmp1, tmp2, slow, /* is_far */ true); - - // Load (object->mark() | 1) into hdr - ori(hdr, hdr, markWord::unlocked_value); - // Clear lock-bits, into tmp2 - xori(tmp2, hdr, markWord::unlocked_value); - - // Try to swing header from unlocked to locked - Label success; - cmpxchgptr(hdr, tmp2, obj, tmp1, success, &slow); - bind(success); - - // After successful lock, push object on lock-stack - lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); - add(tmp2, xthread, tmp1); - sd(obj, Address(tmp2, 0)); - addw(tmp1, tmp1, oopSize); - sw(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); + assert_different_registers(obj, tmp1, tmp2, tmp3, t0); + + Label push; + const Register top = tmp1; + const Register mark = tmp2; + const Register t = tmp3; + + // Preload the markWord. It is important that this is the first + // instruction emitted as it is part of C1's null check semantics. + ld(mark, Address(obj, oopDesc::mark_offset_in_bytes())); + + // Check if the lock-stack is full. + lwu(top, Address(xthread, JavaThread::lock_stack_top_offset())); + mv(t, (unsigned)LockStack::end_offset()); + bge(top, t, slow, /* is_far */ true); + + // Check for recursion. + add(t, xthread, top); + ld(t, Address(t, -oopSize)); + beq(obj, t, push); + + // Check header for monitor (0b10). + test_bit(t, mark, exact_log2(markWord::monitor_value)); + bnez(t, slow, /* is_far */ true); + + // Try to lock. Transition lock bits 0b00 => 0b01 + assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid a lea"); + ori(mark, mark, markWord::unlocked_value); + xori(t, mark, markWord::unlocked_value); + cmpxchg(/*addr*/ obj, /*expected*/ mark, /*new*/ t, Assembler::int64, + /*acquire*/ Assembler::aq, /*release*/ Assembler::relaxed, /*result*/ t); + bne(mark, t, slow, /* is_far */ true); + + bind(push); + // After successful lock, push object on lock-stack. + add(t, xthread, top); + sd(obj, Address(t)); + addw(top, top, oopSize); + sw(top, Address(xthread, JavaThread::lock_stack_top_offset())); } // Implements ligthweight-unlocking. -// Branches to slow upon failure. -// Falls through upon success. // // - obj: the object to be unlocked -// - hdr: the (pre-loaded) header of the object -// - tmp1, tmp2: temporary registers -void MacroAssembler::lightweight_unlock(Register obj, Register hdr, Register tmp1, Register tmp2, Label& slow) { +// - tmp1, tmp2, tmp3: temporary registers +// - slow: branched to if unlocking fails +void MacroAssembler::lightweight_unlock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow) { assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); - assert_different_registers(obj, hdr, tmp1, tmp2, t0); + assert_different_registers(obj, tmp1, tmp2, tmp3, t0); #ifdef ASSERT { - // The following checks rely on the fact that LockStack is only ever modified by - // its owning thread, even if the lock got inflated concurrently; removal of LockStack - // entries after inflation will happen delayed in that case. - // Check for lock-stack underflow. Label stack_ok; lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); mv(tmp2, (unsigned)LockStack::start_offset()); - bgt(tmp1, tmp2, stack_ok); + bge(tmp1, tmp2, stack_ok); STOP("Lock-stack underflow"); bind(stack_ok); } - { - // Check if the top of the lock-stack matches the unlocked object. - Label tos_ok; - subw(tmp1, tmp1, oopSize); - add(tmp1, xthread, tmp1); - ld(tmp1, Address(tmp1, 0)); - beq(tmp1, obj, tos_ok); - STOP("Top of lock-stack does not match the unlocked object"); - bind(tos_ok); - } - { - // Check that hdr is fast-locked. - Label hdr_ok; - andi(tmp1, hdr, markWord::lock_mask_in_place); - beqz(tmp1, hdr_ok); - STOP("Header is not fast-locked"); - bind(hdr_ok); - } #endif - // Load the new header (unlocked) into tmp1 - ori(tmp1, hdr, markWord::unlocked_value); + Label unlocked, push_and_slow; + const Register top = tmp1; + const Register mark = tmp2; + const Register t = tmp3; + + // Check if obj is top of lock-stack. + lwu(top, Address(xthread, JavaThread::lock_stack_top_offset())); + subw(top, top, oopSize); + add(t, xthread, top); + ld(t, Address(t)); + bne(obj, t, slow, /* is_far */ true); + + // Pop lock-stack. + DEBUG_ONLY(add(t, xthread, top);) + DEBUG_ONLY(sd(zr, Address(t));) + sw(top, Address(xthread, JavaThread::lock_stack_top_offset())); + + // Check if recursive. + add(t, xthread, top); + ld(t, Address(t, -oopSize)); + beq(obj, t, unlocked); + + // Not recursive. Check header for monitor (0b10). + ld(mark, Address(obj, oopDesc::mark_offset_in_bytes())); + test_bit(t, mark, exact_log2(markWord::monitor_value)); + bnez(t, push_and_slow); - // Try to swing header from locked to unlocked - Label success; - cmpxchgptr(hdr, tmp1, obj, tmp2, success, &slow); - bind(success); - - // After successful unlock, pop object from lock-stack - lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); - subw(tmp1, tmp1, oopSize); #ifdef ASSERT - add(tmp2, xthread, tmp1); - sd(zr, Address(tmp2, 0)); + // Check header not unlocked (0b01). + Label not_unlocked; + test_bit(t, mark, exact_log2(markWord::unlocked_value)); + beqz(t, not_unlocked); + stop("lightweight_unlock already unlocked"); + bind(not_unlocked); #endif - sw(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); + + // Try to unlock. Transition lock bits 0b00 => 0b01 + assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid lea"); + ori(t, mark, markWord::unlocked_value); + cmpxchg(/*addr*/ obj, /*expected*/ mark, /*new*/ t, Assembler::int64, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, /*result*/ t); + beq(mark, t, unlocked); + + bind(push_and_slow); + // Restore lock-stack and handle the unlock in runtime. + DEBUG_ONLY(add(t, xthread, top);) + DEBUG_ONLY(sd(obj, Address(t));) + addw(top, top, oopSize); + sw(top, Address(xthread, JavaThread::lock_stack_top_offset())); + j(slow); + + bind(unlocked); } diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp index 63cfb22855180..75c0f9ba30596 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp @@ -1519,8 +1519,8 @@ class MacroAssembler: public Assembler { void store_conditional(Register dst, Register new_val, Register addr, enum operand_size size, Assembler::Aqrl release); public: - void lightweight_lock(Register obj, Register hdr, Register tmp1, Register tmp2, Label& slow); - void lightweight_unlock(Register obj, Register hdr, Register tmp1, Register tmp2, Label& slow); + void lightweight_lock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow); + void lightweight_unlock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow); }; #ifdef ASSERT diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad index 10a80cd094024..86d0ad7a05929 100644 --- a/src/hotspot/cpu/riscv/riscv.ad +++ b/src/hotspot/cpu/riscv/riscv.ad @@ -10469,10 +10469,11 @@ instruct tlsLoadP(javaThread_RegP dst) // using t1 as the 'flag' register to bridge the BoolNode producers and consumers instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3) %{ + predicate(LockingMode != LM_LIGHTWEIGHT); match(Set cr (FastLock object box)); effect(TEMP tmp1, TEMP tmp2, TEMP tmp3); - ins_cost(LOAD_COST * 2 + STORE_COST * 3 + ALU_COST * 6 + BRANCH_COST * 3); + ins_cost(10 * DEFAULT_COST); format %{ "fastlock $object,$box\t! kills $tmp1,$tmp2,$tmp3, #@cmpFastLock" %} ins_encode %{ @@ -10485,10 +10486,11 @@ instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iReg // using t1 as the 'flag' register to bridge the BoolNode producers and consumers instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2) %{ + predicate(LockingMode != LM_LIGHTWEIGHT); match(Set cr (FastUnlock object box)); effect(TEMP tmp1, TEMP tmp2); - ins_cost(LOAD_COST * 2 + STORE_COST + ALU_COST * 2 + BRANCH_COST * 4); + ins_cost(10 * DEFAULT_COST); format %{ "fastunlock $object,$box\t! kills $tmp1, $tmp2, #@cmpFastUnlock" %} ins_encode %{ @@ -10498,6 +10500,38 @@ instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iR ins_pipe(pipe_serial); %} +instruct cmpFastLockLightweight(rFlagsReg cr, iRegP object, iRegP_R10 box, iRegPNoSp tmp1, iRegPNoSp tmp2) +%{ + predicate(LockingMode == LM_LIGHTWEIGHT); + match(Set cr (FastLock object box)); + effect(TEMP tmp1, TEMP tmp2, USE_KILL box); + + ins_cost(10 * DEFAULT_COST); + format %{ "fastlock $object,$box\t! kills $box,$tmp1,$tmp2 #@cmpFastLockLightweight" %} + + ins_encode %{ + __ fast_lock_lightweight($object$$Register, $box$$Register, $tmp1$$Register, $tmp2$$Register); + %} + + ins_pipe(pipe_serial); +%} + +instruct cmpFastUnlockLightweight(rFlagsReg cr, iRegP object, iRegP_R10 box, iRegPNoSp tmp1, iRegPNoSp tmp2) +%{ + predicate(LockingMode == LM_LIGHTWEIGHT); + match(Set cr (FastUnlock object box)); + effect(TEMP tmp1, TEMP tmp2, USE_KILL box); + + ins_cost(10 * DEFAULT_COST); + format %{ "fastunlock $object,$box\t! kills $box,$tmp1,$tmp2, #@cmpFastUnlockLightweight" %} + + ins_encode %{ + __ fast_unlock_lightweight($object$$Register, $box$$Register, $tmp1$$Register, $tmp2$$Register); + %} + + ins_pipe(pipe_serial); +%} + // Tail Call; Jump from runtime stub to Java code. // Also known as an 'interprocedural jump'. // Target of jump will eventually return to caller. diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp index 7435b552d15de..5945f9d5fe2ab 100644 --- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp @@ -1679,8 +1679,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, __ sd(swap_reg, Address(lock_reg, mark_word_offset)); __ bnez(swap_reg, slow_path_lock); } else { - assert(LockingMode == LM_LIGHTWEIGHT, ""); - __ ld(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + assert(LockingMode == LM_LIGHTWEIGHT, "must be"); __ lightweight_lock(obj_reg, swap_reg, tmp, lock_tmp, slow_path_lock); } @@ -1806,9 +1805,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, __ decrement(Address(xthread, JavaThread::held_monitor_count_offset())); } else { assert(LockingMode == LM_LIGHTWEIGHT, ""); - __ ld(old_hdr, Address(obj_reg, oopDesc::mark_offset_in_bytes())); - __ test_bit(t0, old_hdr, exact_log2(markWord::monitor_value)); - __ bnez(t0, slow_path_unlock); __ lightweight_unlock(obj_reg, old_hdr, swap_reg, lock_tmp, slow_path_unlock); __ decrement(Address(xthread, JavaThread::held_monitor_count_offset())); } diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp index de85fb166f834..5ae2c77d7ff21 100644 --- a/src/hotspot/cpu/riscv/vm_version_riscv.hpp +++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp @@ -217,6 +217,8 @@ class VM_Version : public Abstract_VM_Version { constexpr static bool supports_stack_watermark_barrier() { return true; } + constexpr static bool supports_recursive_lightweight_locking() { return true; } + static bool supports_on_spin_wait() { return UseZihintpause; } // RISCV64 supports fast class initialization checks