From 6f77021223962f96c53af06fd26b635e815ad9a0 Mon Sep 17 00:00:00 2001 From: jimmyk Date: Sat, 27 Jul 2024 15:04:19 -0400 Subject: [PATCH] Accelerate Unsafe CAS Intrinsics on Power and X Adds support for the following recognized methods: CompareAndExchangeObject //JDK11 CompareAndExchangeReference //JDK17,21 CompareAndExchangeInt //JDK11,17,21 CompareAndExchangeLong //JDK11,17,21 Similar to their CompareAndSet counterparts, the JIT acceleration does not support CAE on static fields so the createUnsafeCASCallDiamond function was updated to also work on the CAE methods. The accelerated CAE code was built on top of the existing accelerated CAS support on both Power and X. Removed recognized CAS enums from isUnsafePut. They are not Unsafe put methods and these cases could not be triggered. Even before my changes, setting TR_UseOldCompareAndSwapObject and TR_DisableCASInlining would cause a crash on x. It doesn't really make sense to set both at the same time but I fixed it anyways since it was quick. VMwrtbarWithoutStoreEvaluator is used for several different opcdes include arraycopy, ArrayStoreCHK, writeBarrier and Unsafe CAS calls. This is only used for the Unsafe CAS calls that store an object. To differtiate Unsafe CAS calls from the other cases, there is a check for the node being an icall. This check is no longer valid with the introduction of support for compareAndExchangeReference. The node in this case is an acall. This changes the check to a generic call check. There is also a check for the call being to a recognized method. This check now checks for specific recognized methods which are: sun_misc_Unsafe_compareAndSwapObject_jlObjectJjlObjectjlObject_Z jdk_internal_misc_Unsafe_compareAndExchangeObject jdk_internal_misc_Unsafe_compareAndExchangeReference Signed-off-by: jimmyk --- runtime/compiler/codegen/J9CodeGenerator.cpp | 8 +- .../codegen/J9RecognizedMethodsEnum.hpp | 8 +- runtime/compiler/env/j9method.cpp | 21 +- .../compiler/optimizer/InlinerTempForJ9.cpp | 49 +++- .../compiler/p/codegen/J9TreeEvaluator.cpp | 258 +++++++++++++----- .../compiler/x/codegen/J9TreeEvaluator.cpp | 171 +++++++++--- 6 files changed, 393 insertions(+), 122 deletions(-) diff --git a/runtime/compiler/codegen/J9CodeGenerator.cpp b/runtime/compiler/codegen/J9CodeGenerator.cpp index 690514efe24..98be8fe206e 100644 --- a/runtime/compiler/codegen/J9CodeGenerator.cpp +++ b/runtime/compiler/codegen/J9CodeGenerator.cpp @@ -655,13 +655,17 @@ J9::CodeGenerator::lowerTreesPreChildrenVisit(TR::Node *parent, TR::TreeTop *tre // Hiding compressedref logic from CodeGen doesn't seem a good practise, the evaluator always need the uncompressedref node for write barrier, // therefore, this part is deprecated. It'll be removed once P and Z update their corresponding evaluators. static bool UseOldCompareAndSwapObject = (bool)feGetEnv("TR_UseOldCompareAndSwapObject"); - if (self()->comp()->useCompressedPointers() && (UseOldCompareAndSwapObject || !(self()->comp()->target().cpu.isX86() || self()->comp()->target().cpu.isARM64()))) + static bool disableCASInlining = feGetEnv("TR_DisableCASInlining") != NULL; + if (self()->comp()->useCompressedPointers() && ((UseOldCompareAndSwapObject && (self()->comp()->target().cpu.isARM64() || !disableCASInlining)) || !(self()->comp()->target().cpu.isX86() || self()->comp()->target().cpu.isARM64()))) { TR::MethodSymbol *methodSymbol = parent->getSymbol()->castToMethodSymbol(); + static bool disableCAEIntrinsic = feGetEnv("TR_DisableCAEIntrinsic") != NULL; // In Java9 Unsafe could be the jdk.internal JNI method or the sun.misc ordinary method wrapper, // while in Java8 it can only be the sun.misc package which will itself contain the JNI method. // Test for isNative to distinguish between them. - if ((methodSymbol->getRecognizedMethod() == TR::sun_misc_Unsafe_compareAndSwapObject_jlObjectJjlObjectjlObject_Z) && + if (((methodSymbol->getRecognizedMethod() == TR::sun_misc_Unsafe_compareAndSwapObject_jlObjectJjlObjectjlObject_Z) || + ((methodSymbol->getRecognizedMethod() == TR::jdk_internal_misc_Unsafe_compareAndExchangeObject) && !disableCAEIntrinsic) || + ((methodSymbol->getRecognizedMethod() == TR::jdk_internal_misc_Unsafe_compareAndExchangeReference) && !disableCAEIntrinsic)) && methodSymbol->isNative() && (!TR::Compiler->om.canGenerateArraylets() || parent->isUnsafeGetPutCASCallOnNonArray()) && parent->isSafeForCGToFastPathUnsafeCall()) { diff --git a/runtime/compiler/codegen/J9RecognizedMethodsEnum.hpp b/runtime/compiler/codegen/J9RecognizedMethodsEnum.hpp index 40e23b64997..237105ee7cf 100644 --- a/runtime/compiler/codegen/J9RecognizedMethodsEnum.hpp +++ b/runtime/compiler/codegen/J9RecognizedMethodsEnum.hpp @@ -362,9 +362,6 @@ sun_misc_Unsafe_compareAndSwapInt_jlObjectJII_Z, sun_misc_Unsafe_compareAndSwapLong_jlObjectJJJ_Z, sun_misc_Unsafe_compareAndSwapObject_jlObjectJjlObjectjlObject_Z, - sun_misc_Unsafe_compareAndExchangeInt_jlObjectJII_Z, - sun_misc_Unsafe_compareAndExchangeLong_jlObjectJJJ_Z, - sun_misc_Unsafe_compareAndExchangeObject_jlObjectJjlObjectjlObject_Z, sun_misc_Unsafe_putBoolean_jlObjectJZ_V, sun_misc_Unsafe_putByte_jlObjectJB_V, @@ -459,6 +456,11 @@ sun_misc_Unsafe_allocateInstance, sun_misc_Unsafe_allocateUninitializedArray0, + jdk_internal_misc_Unsafe_compareAndExchangeInt, + jdk_internal_misc_Unsafe_compareAndExchangeLong, + jdk_internal_misc_Unsafe_compareAndExchangeObject, + jdk_internal_misc_Unsafe_compareAndExchangeReference, + jdk_internal_misc_Unsafe_copyMemory0, jdk_internal_loader_NativeLibraries_load, jdk_internal_util_ArraysSupport_vectorizedMismatch, diff --git a/runtime/compiler/env/j9method.cpp b/runtime/compiler/env/j9method.cpp index 69ab0248bda..a5306edf78d 100644 --- a/runtime/compiler/env/j9method.cpp +++ b/runtime/compiler/env/j9method.cpp @@ -2975,10 +2975,10 @@ void TR_ResolvedJ9Method::construct() {x(TR::sun_misc_Unsafe_compareAndSwapObject_jlObjectJjlObjectjlObject_Z, "compareAndSetObject", "(Ljava/lang/Object;JLjava/lang/Object;Ljava/lang/Object;)Z")}, {x(TR::sun_misc_Unsafe_compareAndSwapObject_jlObjectJjlObjectjlObject_Z, "compareAndSetReference", "(Ljava/lang/Object;JLjava/lang/Object;Ljava/lang/Object;)Z")}, - {x(TR::sun_misc_Unsafe_compareAndExchangeInt_jlObjectJII_Z, "compareAndExchangeInt", "(Ljava/lang/Object;JII)I")}, - {x(TR::sun_misc_Unsafe_compareAndExchangeLong_jlObjectJJJ_Z, "compareAndExchangeLong", "(Ljava/lang/Object;JJJ)J")}, - {x(TR::sun_misc_Unsafe_compareAndExchangeObject_jlObjectJjlObjectjlObject_Z, "compareAndExchangeObject", "(Ljava/lang/Object;JLjava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;")}, - {x(TR::sun_misc_Unsafe_compareAndExchangeObject_jlObjectJjlObjectjlObject_Z, "compareAndExchangeReference", "(Ljava/lang/Object;JLjava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;")}, + {x(TR::jdk_internal_misc_Unsafe_compareAndExchangeInt, "compareAndExchangeInt", "(Ljava/lang/Object;JII)I")}, + {x(TR::jdk_internal_misc_Unsafe_compareAndExchangeLong, "compareAndExchangeLong", "(Ljava/lang/Object;JJJ)J")}, + {x(TR::jdk_internal_misc_Unsafe_compareAndExchangeObject, "compareAndExchangeObject", "(Ljava/lang/Object;JLjava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;")}, + {x(TR::jdk_internal_misc_Unsafe_compareAndExchangeReference, "compareAndExchangeReference", "(Ljava/lang/Object;JLjava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;")}, {x(TR::sun_misc_Unsafe_staticFieldBase, "staticFieldBase", "(Ljava/lang/reflect/Field;)Ljava/lang/Object")}, {x(TR::sun_misc_Unsafe_staticFieldOffset, "staticFieldOffset", "(Ljava/lang/reflect/Field;)J")}, @@ -4981,6 +4981,11 @@ TR_ResolvedJ9Method::setRecognizedMethodInfo(TR::RecognizedMethod rm) // from bool TR::TreeEvaluator::VMinlineCallEvaluator(TR::Node *node, bool isIndirect, TR::CodeGenerator *cg) //case TR::sun_misc_Unsafe_copyMemory: + case TR::jdk_internal_misc_Unsafe_compareAndExchangeInt: + case TR::jdk_internal_misc_Unsafe_compareAndExchangeLong: + case TR::jdk_internal_misc_Unsafe_compareAndExchangeObject: + case TR::jdk_internal_misc_Unsafe_compareAndExchangeReference: + case TR::sun_misc_Unsafe_loadFence: case TR::sun_misc_Unsafe_storeFence: case TR::sun_misc_Unsafe_fullFence: @@ -5524,6 +5529,11 @@ TR_J9MethodBase::isUnsafeCAS(TR::Compilation * c) TR::RecognizedMethod rm = getRecognizedMethod(); switch (rm) { + case TR::jdk_internal_misc_Unsafe_compareAndExchangeInt: + case TR::jdk_internal_misc_Unsafe_compareAndExchangeLong: + case TR::jdk_internal_misc_Unsafe_compareAndExchangeObject: + case TR::jdk_internal_misc_Unsafe_compareAndExchangeReference: + return (c->target().cpu.isPower() || c->target().cpu.isX86()); case TR::sun_misc_Unsafe_compareAndSwapInt_jlObjectJII_Z: case TR::sun_misc_Unsafe_compareAndSwapLong_jlObjectJJJ_Z: case TR::sun_misc_Unsafe_compareAndSwapObject_jlObjectJjlObjectjlObject_Z: @@ -5814,9 +5824,6 @@ TR_J9MethodBase::isUnsafePut(TR::RecognizedMethod rm) { switch (rm) { - case TR::sun_misc_Unsafe_compareAndSwapInt_jlObjectJII_Z: - case TR::sun_misc_Unsafe_compareAndSwapLong_jlObjectJJJ_Z: - case TR::sun_misc_Unsafe_compareAndSwapObject_jlObjectJjlObjectjlObject_Z: case TR::sun_misc_Unsafe_getAndAddInt: case TR::sun_misc_Unsafe_getAndAddLong: case TR::sun_misc_Unsafe_getAndSetInt: diff --git a/runtime/compiler/optimizer/InlinerTempForJ9.cpp b/runtime/compiler/optimizer/InlinerTempForJ9.cpp index f15332181ce..92053b82bd8 100644 --- a/runtime/compiler/optimizer/InlinerTempForJ9.cpp +++ b/runtime/compiler/optimizer/InlinerTempForJ9.cpp @@ -403,18 +403,41 @@ TR_J9InlinerPolicy::alwaysWorthInlining(TR_ResolvedMethod * calleeMethod, TR::No case TR::java_nio_ByteOrder_nativeOrder: return true; - // In Java9 the following enum values match both sun.misc.Unsafe and - // jdk.internal.misc.Unsafe The sun.misc.Unsafe methods are simple - // wrappers to call jdk.internal impls, and we want to inline them. Since - // the same code can run with Java8 classes where sun.misc.Unsafe has the - // JNI impl, we need to differentiate by testing with isNative(). If it is - // native, then we don't need to inline it as it will be handled - // elsewhere. + case TR::jdk_internal_misc_Unsafe_compareAndExchangeInt: + case TR::jdk_internal_misc_Unsafe_compareAndExchangeLong: + case TR::jdk_internal_misc_Unsafe_compareAndExchangeReference: + if (comp()->target().cpu.isPower() || comp()->target().cpu.isX86()) + { + return false; + } + break; + + /* In Java9 the compareAndSwap[Int|Long|Object] and copyMemory enums match + * both sun.misc.Unsafe and jdk.internal.misc.Unsafe. The sun.misc.Unsafe + * methods are simple wrappers to call jdk.internal impls, and we want to + * inline them. Since the same code can run with Java8 classes where + * sun.misc.Unsafe has the JNI impl, we need to differentiate by testing + * with isNative(). If it is native, then we don't need to inline it as it + * will be handled elsewhere. + * + * Starting from Java12, compareAndExchangeObject was changed from being a + * native to being a simple wrapper to call compareAndExchangeReference. + * The enum matches both cases and we only want to force inlining on the + * non-native case. If the native case reaches here, it means it already + * failed the isInlineableJNI check and should not be force inlined. + */ + case TR::jdk_internal_misc_Unsafe_compareAndExchangeObject: + if (comp()->target().cpu.isPower() || comp()->target().cpu.isX86()) + { + return !calleeMethod->isNative(); + } + break; case TR::sun_misc_Unsafe_compareAndSwapInt_jlObjectJII_Z: case TR::sun_misc_Unsafe_compareAndSwapLong_jlObjectJJJ_Z: case TR::sun_misc_Unsafe_compareAndSwapObject_jlObjectJjlObjectjlObject_Z: case TR::sun_misc_Unsafe_copyMemory: return !calleeMethod->isNative(); + default: break; } @@ -1704,7 +1727,7 @@ TR_J9InlinerPolicy::createUnsafeMonitorOp(TR::ResolvedMethodSymbol *calleeSymbol } bool -TR_J9InlinerPolicy::createUnsafeCASCallDiamond( TR::TreeTop *callNodeTreeTop, TR::Node *callNode) +TR_J9InlinerPolicy::createUnsafeCASCallDiamond(TR::TreeTop *callNodeTreeTop, TR::Node *callNode) { // This method is used to create an if diamond around a call to any of the unsafe compare and swap methods // Codegens have a fast path for the compare and swaps, but cannot deal with the case where the offset value passed in to a the CAS is low tagged @@ -2450,6 +2473,7 @@ TR_J9InlinerPolicy::inlineUnsafeCall(TR::ResolvedMethodSymbol *calleeSymbol, TR: !comp()->fej9()->traceableMethodsCanBeInlined())) return false; + static bool disableCAEIntrinsic = feGetEnv("TR_DisableCAEIntrinsic") != NULL; // I am not sure if having the same type between C/S and B/Z matters here.. ie. if the type is being used as the only distinguishing factor switch (callNode->getSymbol()->castToResolvedMethodSymbol()->getRecognizedMethod()) { @@ -2615,6 +2639,15 @@ TR_J9InlinerPolicy::inlineUnsafeCall(TR::ResolvedMethodSymbol *calleeSymbol, TR: case TR::sun_misc_Unsafe_objectFieldOffset: return false; // todo + case TR::jdk_internal_misc_Unsafe_compareAndExchangeInt: + case TR::jdk_internal_misc_Unsafe_compareAndExchangeLong: + case TR::jdk_internal_misc_Unsafe_compareAndExchangeObject: + case TR::jdk_internal_misc_Unsafe_compareAndExchangeReference: + if (disableCAEIntrinsic || !(comp()->target().cpu.isPower() || comp()->target().cpu.isX86())) + { + break; + } + // Fallthrough if previous if condition is not met. case TR::sun_misc_Unsafe_compareAndSwapInt_jlObjectJII_Z: case TR::sun_misc_Unsafe_compareAndSwapLong_jlObjectJJJ_Z: case TR::sun_misc_Unsafe_compareAndSwapObject_jlObjectJjlObjectjlObject_Z: diff --git a/runtime/compiler/p/codegen/J9TreeEvaluator.cpp b/runtime/compiler/p/codegen/J9TreeEvaluator.cpp index 5497a3edf1f..8a284d94ce6 100644 --- a/runtime/compiler/p/codegen/J9TreeEvaluator.cpp +++ b/runtime/compiler/p/codegen/J9TreeEvaluator.cpp @@ -8067,10 +8067,39 @@ void J9::Power::TreeEvaluator::genWrtbarForArrayCopy(TR::Node *node, TR::Registe } static TR::Register *genCAS(TR::Node *node, TR::CodeGenerator *cg, TR::Register *objReg, TR::Register *offsetReg, TR::Register *oldVReg, TR::Register *newVReg, TR::Register *cndReg, - TR::LabelSymbol *doneLabel, TR::Node *objNode, int32_t oldValue, bool oldValueInReg, bool isLong, bool casWithoutSync = false) + TR::LabelSymbol *doneLabel, TR::Node *objNode, int32_t oldValue, bool oldValueInReg, int32_t dataSize, bool isReference, bool isExchange, bool casWithoutSync) { - TR::Register *resultReg = cg->allocateRegister(); - TR::Instruction *gcPoint; + TR::Compilation *comp = cg->comp(); + TR::Register *resultReg; + + if (isReference && isExchange) + { + resultReg = cg->allocateCollectedReferenceRegister(); + } + else + { + resultReg = cg->allocateRegister(); + } + + TR::InstOpCode::Mnemonic reservedLoadOpCode, conditionalStoreOpCode, compareOpCode, compareImmOpCode; + switch (dataSize) + { + case 4: + reservedLoadOpCode = TR::InstOpCode::lwarx; + conditionalStoreOpCode = TR::InstOpCode::stwcx_r; + compareOpCode = TR::InstOpCode::cmp4; + compareImmOpCode = TR::InstOpCode::cmpi4; + break; + case 8: + reservedLoadOpCode = TR::InstOpCode::ldarx; + conditionalStoreOpCode = TR::InstOpCode::stdcx_r; + compareOpCode = TR::InstOpCode::cmp8; + compareImmOpCode = TR::InstOpCode::cmpi8; + break; + default: + TR_ASSERT_FATAL_WITH_NODE(node, false, "Unknown dataSize: %d\n", dataSize); + break; + } // Memory barrier --- NOTE: we should be able to do a test upfront to save this barrier, // but Hursley advised to be conservative due to lack of specification. @@ -8079,75 +8108,107 @@ static TR::Register *genCAS(TR::Node *node, TR::CodeGenerator *cg, TR::Register TR::LabelSymbol *loopLabel = generateLabelSymbol(cg); generateLabelInstruction(cg, TR::InstOpCode::label, node, loopLabel); - generateTrg1MemInstruction(cg, isLong ? TR::InstOpCode::ldarx : TR::InstOpCode::lwarx, node, resultReg, TR::MemoryReference::createWithIndexReg(cg, objReg, offsetReg, isLong ? 8 : 4)); + generateTrg1MemInstruction(cg, reservedLoadOpCode, node, resultReg, TR::MemoryReference::createWithIndexReg(cg, objReg, offsetReg, dataSize)); if (oldValueInReg) - generateTrg1Src2Instruction(cg, isLong ? TR::InstOpCode::cmp8 : TR::InstOpCode::cmp4, node, cndReg, resultReg, oldVReg); + generateTrg1Src2Instruction(cg, compareOpCode, node, cndReg, resultReg, oldVReg); else - generateTrg1Src1ImmInstruction(cg, isLong ? TR::InstOpCode::cmpi8 : TR::InstOpCode::cmpi4, node, cndReg, resultReg, oldValue); - generateTrg1ImmInstruction(cg, TR::InstOpCode::li, node, resultReg, 0); + generateTrg1Src1ImmInstruction(cg, compareImmOpCode, node, cndReg, resultReg, oldValue); + + if (!isExchange) + { + generateTrg1ImmInstruction(cg, TR::InstOpCode::li, node, resultReg, 0); + } + else if (isReference && comp->target().is64Bit() && comp->useCompressedPointers()) + { + genDecompressPointer(cg, node, resultReg); + } // We don't know how the compare will fare such that we don't dictate the prediction generateConditionalBranchInstruction(cg, TR::InstOpCode::bne, node, doneLabel, cndReg); - generateMemSrc1Instruction(cg, isLong ? TR::InstOpCode::stdcx_r : TR::InstOpCode::stwcx_r, node, TR::MemoryReference::createWithIndexReg(cg, objReg, offsetReg, isLong ? 8 : 4), newVReg); + generateMemSrc1Instruction(cg, conditionalStoreOpCode, node, TR::MemoryReference::createWithIndexReg(cg, objReg, offsetReg, dataSize), newVReg); // We expect this store is usually successful, i.e., the following branch will not be taken generateConditionalBranchInstruction(cg, TR::InstOpCode::bne, PPCOpProp_BranchUnlikely, node, loopLabel, cndReg); // We deviate from the VM helper here: no-store-no-barrier instead of always-barrier if (!casWithoutSync) generateInstruction(cg, TR::InstOpCode::sync, node); - generateTrg1ImmInstruction(cg, TR::InstOpCode::li, node, resultReg, 1); + + if (!isExchange) + { + generateTrg1ImmInstruction(cg, TR::InstOpCode::li, node, resultReg, 1); + } node->setRegister(resultReg); return resultReg; } -static TR::Register *VMinlineCompareAndSwap(TR::Node *node, TR::CodeGenerator *cg, bool isLong) +static TR::Register *VMinlineCompareAndSetOrExchange(TR::Node *node, TR::CodeGenerator *cg, int32_t dataSize, bool isExchange) { TR::Compilation * comp = cg->comp(); TR::Register *objReg, *offsetReg, *oldVReg, *newVReg, *resultReg, *cndReg; - TR::Node *firstChild, *secondChild, *thirdChild, *fourthChild, *fifthChild; + TR::Node *firstChild, *objNode, *offsetNode, *oldVNode, *newVNode; TR::RegisterDependencyConditions *conditions; - TR::LabelSymbol *doneLabel; - intptr_t offsetValue, oldValue; + TR::LabelSymbol *startLabel, *doneLabel; + int64_t offsetValue, oldValue; bool oldValueInReg = true, freeOffsetReg = false; TR_J9VMBase *fej9 = (TR_J9VMBase *) (cg->fe()); firstChild = node->getFirstChild(); - secondChild = node->getSecondChild(); - thirdChild = node->getChild(2); - fourthChild = node->getChild(3); - fifthChild = node->getChild(4); - objReg = cg->evaluate(secondChild); + objNode = node->getSecondChild(); + offsetNode = node->getChild(2); + oldVNode = node->getChild(3); + newVNode = node->getChild(4); + + objReg = cg->evaluate(objNode); // VM helper chops off the value in 32bit, and we don't want the whole long value either - if (thirdChild->getOpCode().isLoadConst() && thirdChild->getRegister() == NULL && comp->target().is32Bit()) + if (offsetNode->getOpCode().isLoadConst() && offsetNode->getRegister() == NULL && comp->target().is32Bit()) { - offsetValue = thirdChild->getLongInt(); + offsetValue = offsetNode->getLongInt(); offsetReg = cg->allocateRegister(); loadConstant(cg, node, (int32_t) offsetValue, offsetReg); freeOffsetReg = true; } else { - offsetReg = cg->evaluate(thirdChild); + offsetReg = cg->evaluate(offsetNode); + freeOffsetReg = false; + + // Assume that the offset is positive and not pathologically large (i.e., > 2^31). if (comp->target().is32Bit()) offsetReg = offsetReg->getLowOrder(); } - if (fourthChild->getOpCode().isLoadConst() && fourthChild->getRegister() == NULL) + if (oldVNode->getOpCode().isLoadConst() && oldVNode->getRegister() == NULL) { - if (isLong) - oldValue = fourthChild->getLongInt(); - else - oldValue = fourthChild->getInt(); + switch (dataSize) + { + case 4: + oldValue = oldVNode->getInt(); + break; + case 8: + oldValue = oldVNode->getLongInt(); + break; + default: + TR_ASSERT_FATAL_WITH_NODE(node, false, "Unknown dataSize: %d\n", dataSize); + break; + } + if (oldValue >= LOWER_IMMED && oldValue <= UPPER_IMMED) + { oldValueInReg = false; + } } + if (oldValueInReg) - oldVReg = cg->evaluate(fourthChild); - newVReg = cg->evaluate(fifthChild); + { + oldVReg = cg->evaluate(oldVNode); + } + + newVReg = cg->evaluate(newVNode); cndReg = cg->allocateRegister(TR_CCR); + startLabel = generateLabelSymbol(cg); doneLabel = generateLabelSymbol(cg); bool casWithoutSync = false; @@ -8163,7 +8224,10 @@ static TR::Register *VMinlineCompareAndSwap(TR::Node *node, TR::CodeGenerator *c } } - resultReg = genCAS(node, cg, objReg, offsetReg, oldVReg, newVReg, cndReg, doneLabel, secondChild, oldValue, oldValueInReg, isLong, casWithoutSync); + generateLabelInstruction(cg, TR::InstOpCode::label, node, startLabel); + startLabel->setStartInternalControlFlow(); + + resultReg = genCAS(node, cg, objReg, offsetReg, oldVReg, newVReg, cndReg, doneLabel, objNode, oldValue, oldValueInReg, dataSize, false, isExchange, casWithoutSync); conditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(6, 6, cg->trMemory()); TR::addDependency(conditions, objReg, TR::RealRegister::NoReg, TR_GPR, cg); @@ -8176,35 +8240,46 @@ static TR::Register *VMinlineCompareAndSwap(TR::Node *node, TR::CodeGenerator *c TR::addDependency(conditions, cndReg, TR::RealRegister::cr0, TR_CCR, cg); generateDepLabelInstruction(cg, TR::InstOpCode::label, node, doneLabel, conditions); + doneLabel->setEndInternalControlFlow(); cg->stopUsingRegister(cndReg); cg->recursivelyDecReferenceCount(firstChild); - cg->decReferenceCount(secondChild); + cg->decReferenceCount(objNode); + if (freeOffsetReg) { cg->stopUsingRegister(offsetReg); - cg->recursivelyDecReferenceCount(thirdChild); + cg->recursivelyDecReferenceCount(offsetNode); } else - cg->decReferenceCount(thirdChild); + { + cg->decReferenceCount(offsetNode); + } + if (oldValueInReg) - cg->decReferenceCount(fourthChild); + { + cg->decReferenceCount(oldVNode); + } else - cg->recursivelyDecReferenceCount(fourthChild); - cg->decReferenceCount(fifthChild); + { + cg->recursivelyDecReferenceCount(oldVNode); + } + + cg->decReferenceCount(newVNode); + return resultReg; } -static TR::Register *VMinlineCompareAndSwapObject(TR::Node *node, TR::CodeGenerator *cg) +static TR::Register *VMinlineCompareAndSetOrExchangeReference(TR::Node *node, TR::CodeGenerator *cg, bool isExchange) { TR::Compilation *comp = cg->comp(); TR_J9VMBase *fej9 = (TR_J9VMBase *) (comp->fe()); TR::Register *objReg, *offsetReg, *oldVReg, *newVReg, *resultReg, *cndReg; - TR::Node *firstChild, *secondChild, *thirdChild, *fourthChild, *fifthChild; + TR::Node *firstChild, *objNode, *offsetNode, *oldVNode, *newVNode; TR::RegisterDependencyConditions *conditions; TR::LabelSymbol *doneLabel, *storeLabel, *wrtBarEndLabel; intptr_t offsetValue; - bool freeOffsetReg = false; + bool freeOffsetReg; bool needDup = false; auto gcMode = TR::Compiler->om.writeBarrierType(); @@ -8213,42 +8288,46 @@ static TR::Register *VMinlineCompareAndSwapObject(TR::Node *node, TR::CodeGenera bool doCrdMrk = (gcMode == gc_modron_wrtbar_cardmark || gcMode == gc_modron_wrtbar_cardmark_and_oldcheck || gcMode == gc_modron_wrtbar_cardmark_incremental); firstChild = node->getFirstChild(); - secondChild = node->getSecondChild(); - thirdChild = node->getChild(2); - fourthChild = node->getChild(3); - fifthChild = node->getChild(4); - objReg = cg->evaluate(secondChild); + objNode = node->getSecondChild(); + offsetNode = node->getChild(2); + oldVNode = node->getChild(3); + newVNode = node->getChild(4); + + objReg = cg->evaluate(objNode); // VM helper chops off the value in 32bit, and we don't want the whole long value either - if (thirdChild->getOpCode().isLoadConst() && thirdChild->getRegister() == NULL && comp->target().is32Bit()) + if (offsetNode->getOpCode().isLoadConst() && offsetNode->getRegister() == NULL && comp->target().is32Bit()) { - offsetValue = thirdChild->getLongInt(); + offsetValue = offsetNode->getLongInt(); offsetReg = cg->allocateRegister(); loadConstant(cg, node, (int32_t) offsetValue, offsetReg); freeOffsetReg = true; } else { - offsetReg = cg->evaluate(thirdChild); + offsetReg = cg->evaluate(offsetNode); + freeOffsetReg = false; + + /* Assume that the offset is positive and not pathologically large (i.e., > 2^31). */ if (comp->target().is32Bit()) offsetReg = offsetReg->getLowOrder(); } - oldVReg = cg->evaluate(fourthChild); + oldVReg = cg->evaluate(oldVNode); - TR::Node *translatedNode = fifthChild; + TR::Node *translatedNode = newVNode; bool bumpedRefCount = false; - if (comp->useCompressedPointers() && (fifthChild->getDataType() != TR::Address)) + if (comp->useCompressedPointers() && (newVNode->getDataType() != TR::Address)) { bool useShiftedOffsets = (TR::Compiler->om.compressedReferenceShiftOffset() != 0); - translatedNode = fifthChild; + translatedNode = newVNode; if (translatedNode->getOpCode().isConversion()) translatedNode = translatedNode->getFirstChild(); if (translatedNode->getOpCode().isRightShift()) // optional translatedNode = translatedNode->getFirstChild(); - translatedNode = fifthChild; + translatedNode = newVNode; if (useShiftedOffsets) { while ((translatedNode->getNumChildren() > 0) && (translatedNode->getOpCodeValue() != TR::a2l)) @@ -8264,7 +8343,7 @@ static TR::Register *VMinlineCompareAndSwapObject(TR::Node *node, TR::CodeGenera } } - newVReg = cg->evaluate(fifthChild); + newVReg = cg->evaluate(newVNode); if (objReg == newVReg) { newVReg = cg->allocateCollectedReferenceRegister(); @@ -8354,7 +8433,7 @@ static TR::Register *VMinlineCompareAndSwapObject(TR::Node *node, TR::CodeGenera #endif //OMR_GC_CONCURRENT_SCAVENGER if (!comp->getOptions()->realTimeGC()) - resultReg = genCAS(node, cg, objReg, offsetReg, oldVReg, newVReg, cndReg, doneLabel, secondChild, 0, true, (comp->target().is64Bit() && !comp->useCompressedPointers())); + resultReg = genCAS(node, cg, objReg, offsetReg, oldVReg, newVReg, cndReg, doneLabel, objNode, 0, true, TR::Compiler->om.sizeofReferenceField(), true, isExchange, false); uint32_t numDeps = (doWrtBar || doCrdMrk) ? 13 : 11; @@ -8368,7 +8447,7 @@ static TR::Register *VMinlineCompareAndSwapObject(TR::Node *node, TR::CodeGenera TR::Register *temp1Reg = cg->allocateRegister(), *temp2Reg = cg->allocateRegister(), *temp3Reg, *temp4Reg = cg->allocateRegister(); TR::addDependency(conditions, objReg, TR::RealRegister::gr3, TR_GPR, cg); TR::Register *wrtbarSrcReg; - if (translatedNode != fifthChild) + if (translatedNode != newVNode) { TR::addDependency(conditions, newVReg, TR::RealRegister::NoReg, TR_GPR, cg); TR::addDependency(conditions, translatedNode->getRegister(), TR::RealRegister::gr4, TR_GPR, cg); @@ -8395,7 +8474,7 @@ static TR::Register *VMinlineCompareAndSwapObject(TR::Node *node, TR::CodeGenera TR::addDependency(conditions, temp3Reg, TR::RealRegister::NoReg, TR_GPR, cg); } - if (!fifthChild->isNonNull()) + if (!newVNode->isNonNull()) { generateTrg1Src1ImmInstruction(cg,TR::InstOpCode::Op_cmpi, node, cndReg, newVReg, NULLVALUE); generateConditionalBranchInstruction(cg, TR::InstOpCode::beq, node, doneLabel, cndReg); @@ -8446,7 +8525,7 @@ static TR::Register *VMinlineCompareAndSwapObject(TR::Node *node, TR::CodeGenera TR::addDependency(conditions, offsetReg, TR::RealRegister::NoReg, TR_GPR, cg); } - if (!fifthChild->isNonNull()) + if (!newVNode->isNonNull()) { generateTrg1Src1ImmInstruction(cg,TR::InstOpCode::Op_cmpi, node, cndReg, newVReg, NULLVALUE); generateConditionalBranchInstruction(cg, TR::InstOpCode::beq, node, wrtBarEndLabel, cndReg); @@ -8514,7 +8593,7 @@ static TR::Register *VMinlineCompareAndSwapObject(TR::Node *node, TR::CodeGenera generateLabelInstruction(cg, TR::InstOpCode::label, node, storeLabel); if (comp->getOptions()->realTimeGC()) - resultReg = genCAS(node, cg, objReg, offsetReg, oldVReg, newVReg, cndReg, doneLabel, secondChild, 0, true, (comp->target().is64Bit() && !comp->useCompressedPointers())); + resultReg = genCAS(node, cg, objReg, offsetReg, oldVReg, newVReg, cndReg, doneLabel, objNode, 0, true, TR::Compiler->om.sizeofReferenceField(), true, isExchange, false); TR::addDependency(conditions, resultReg, TR::RealRegister::NoReg, TR_GPR, cg); if (oldVReg != newVReg && oldVReg != objReg) @@ -8525,18 +8604,24 @@ static TR::Register *VMinlineCompareAndSwapObject(TR::Node *node, TR::CodeGenera if (needDup) cg->stopUsingRegister(newVReg); + cg->stopUsingRegister(cndReg); cg->recursivelyDecReferenceCount(firstChild); - cg->decReferenceCount(secondChild); + cg->decReferenceCount(objNode); + if (freeOffsetReg) { cg->stopUsingRegister(offsetReg); - cg->recursivelyDecReferenceCount(thirdChild); + cg->recursivelyDecReferenceCount(offsetNode); } else - cg->decReferenceCount(thirdChild); - cg->decReferenceCount(fourthChild); - cg->decReferenceCount(fifthChild); + { + cg->decReferenceCount(offsetNode); + } + + cg->decReferenceCount(oldVNode); + cg->decReferenceCount(newVNode); + if (bumpedRefCount) cg->decReferenceCount(translatedNode); @@ -11894,6 +11979,7 @@ J9::Power::CodeGenerator::inlineDirectCall(TR::Node *node, TR::Register *&result } else if (methodSymbol) { + static bool disableCAEIntrinsic = feGetEnv("TR_DisableCAEIntrinsic") != NULL; switch (methodSymbol->getRecognizedMethod()) { case TR::java_util_concurrent_ConcurrentLinkedQueue_tmOffer: @@ -12196,7 +12282,7 @@ J9::Power::CodeGenerator::inlineDirectCall(TR::Node *node, TR::Register *&result if ((node->isUnsafeGetPutCASCallOnNonArray() || !TR::Compiler->om.canGenerateArraylets()) && node->isSafeForCGToFastPathUnsafeCall()) { - resultReg = VMinlineCompareAndSwap(node, cg, false); + resultReg = VMinlineCompareAndSetOrExchange(node, cg, 4, false); return true; } break; @@ -12210,7 +12296,7 @@ J9::Power::CodeGenerator::inlineDirectCall(TR::Node *node, TR::Register *&result if (comp->target().is64Bit() && (node->isUnsafeGetPutCASCallOnNonArray() || !TR::Compiler->om.canGenerateArraylets()) && node->isSafeForCGToFastPathUnsafeCall()) { - resultReg = VMinlineCompareAndSwap(node, cg, true); + resultReg = VMinlineCompareAndSetOrExchange(node, cg, 8, false); return true; } else if ((node->isUnsafeGetPutCASCallOnNonArray() || !TR::Compiler->om.canGenerateArraylets()) && node->isSafeForCGToFastPathUnsafeCall()) @@ -12227,11 +12313,53 @@ J9::Power::CodeGenerator::inlineDirectCall(TR::Node *node, TR::Register *&result if ((node->isUnsafeGetPutCASCallOnNonArray() || !TR::Compiler->om.canGenerateArraylets()) && node->isSafeForCGToFastPathUnsafeCall()) { - resultReg = VMinlineCompareAndSwapObject(node, cg); + resultReg = VMinlineCompareAndSetOrExchangeReference(node, cg, false); return true; } break; + case TR::jdk_internal_misc_Unsafe_compareAndExchangeInt: + if ((node->isUnsafeGetPutCASCallOnNonArray() || !TR::Compiler->om.canGenerateArraylets()) && node->isSafeForCGToFastPathUnsafeCall()) + { + if (!disableCAEIntrinsic) + { + resultReg = VMinlineCompareAndSetOrExchange(node, cg, 4, true); + return true; + } + } + break; + + case TR::jdk_internal_misc_Unsafe_compareAndExchangeLong: + if (comp->target().is64Bit() && (node->isUnsafeGetPutCASCallOnNonArray() || !TR::Compiler->om.canGenerateArraylets()) && node->isSafeForCGToFastPathUnsafeCall()) + { + if (!disableCAEIntrinsic) + { + resultReg = VMinlineCompareAndSetOrExchange(node, cg, 8, true); + return true; + } + } + break; + + case TR::jdk_internal_misc_Unsafe_compareAndExchangeObject: + /* + * Starting from Java 12, compareAndExchangeObject was changed from a native call to a + * Java wrapper calling compareAndExchangeReference. + * We only want to inline the JNI native method, so add an explicit test for isNative(). + */ + if (!methodSymbol->isNative()) + break; + /* If native, fall through. */ + case TR::jdk_internal_misc_Unsafe_compareAndExchangeReference: + if ((node->isUnsafeGetPutCASCallOnNonArray() || !TR::Compiler->om.canGenerateArraylets()) && node->isSafeForCGToFastPathUnsafeCall()) + { + if (!disableCAEIntrinsic) + { + resultReg = VMinlineCompareAndSetOrExchangeReference(node, cg, true); + return true; + } + } + break; + case TR::java_nio_Bits_keepAlive: case TR::java_lang_ref_Reference_reachabilityFence: { diff --git a/runtime/compiler/x/codegen/J9TreeEvaluator.cpp b/runtime/compiler/x/codegen/J9TreeEvaluator.cpp index d693313f514..d842b0aaf68 100644 --- a/runtime/compiler/x/codegen/J9TreeEvaluator.cpp +++ b/runtime/compiler/x/codegen/J9TreeEvaluator.cpp @@ -9514,7 +9514,7 @@ static TR::Register* inlineIntrinsicIndexOf(TR::Node* node, TR::CodeGenerator* c * The Code Generator * */ -static TR::Register* inlineCompareAndSwapObjectNative(TR::Node* node, TR::CodeGenerator* cg) +static TR::Register* inlineCompareAndSwapObjectNative(TR::Node* node, TR::CodeGenerator* cg, bool isExchange) { TR::Compilation *comp = cg->comp(); @@ -9530,7 +9530,7 @@ static TR::Register* inlineCompareAndSwapObjectNative(TR::Node* node, TR::CodeGe TR::Register* offset = cg->evaluate(offsetNode); TR::Register* oldValue = cg->evaluate(oldValueNode); TR::Register* newValue = cg->evaluate(newValueNode); - TR::Register* result = cg->allocateRegister(); + TR::Register* result = isExchange ? nullptr : cg->allocateRegister(); TR::Register* EAX = cg->allocateRegister(); TR::Register* tmp = cg->allocateRegister(); @@ -9610,8 +9610,21 @@ static TR::Register* inlineCompareAndSwapObjectNative(TR::Node* node, TR::CodeGe deps->addPreCondition(EAX, TR::RealRegister::eax, cg); deps->addPostCondition(EAX, TR::RealRegister::eax, cg); generateMemRegInstruction(use64BitClasses ? TR::InstOpCode::LCMPXCHG8MemReg : TR::InstOpCode::LCMPXCHG4MemReg, node, generateX86MemoryReference(object, offset, 0, cg), tmp, deps, cg); - generateRegInstruction(TR::InstOpCode::SETE1Reg, node, result, cg); - generateRegRegInstruction(TR::InstOpCode::MOVZXReg4Reg1, node, result, result, cg); + + if (isExchange) + { + result = EAX; + result->setContainsCollectedReference(); + if (TR::Compiler->om.compressedReferenceShiftOffset() != 0) + { + generateRegImmInstruction(TR::InstOpCode::SHLRegImm1(), node, EAX, TR::Compiler->om.compressedReferenceShiftOffset(), cg); + } + } + else + { + generateRegInstruction(TR::InstOpCode::SETE1Reg, node, result, cg); + generateRegRegInstruction(TR::InstOpCode::MOVZXReg4Reg1, node, result, result, cg); + } // Non-realtime: Generate a write barrier for this kind of object. // @@ -9633,7 +9646,10 @@ static TR::Register* inlineCompareAndSwapObjectNative(TR::Node* node, TR::CodeGe } cg->stopUsingRegister(tmp); - cg->stopUsingRegister(EAX); + if (!isExchange) + { + cg->stopUsingRegister(EAX); + } node->setRegister(result); for (int32_t i = 1; i < node->getNumChildren(); i++) { @@ -9653,6 +9669,7 @@ inlineCompareAndSwapNative( TR::Node *node, int8_t size, bool isObject, + bool isExchange, TR::CodeGenerator *cg) { TR::Node *firstChild = node->getFirstChild(); @@ -9680,20 +9697,28 @@ inlineCompareAndSwapNative( // // Do this early so we can return early without additional evaluations. // - if (size == 4) + switch (size) { - op = TR::InstOpCode::LCMPXCHG4MemReg; - } - else if (size == 8 && comp->target().is64Bit()) - { - op = TR::InstOpCode::LCMPXCHG8MemReg; - } - else - { - if (!comp->target().cpu.supportsFeature(OMR_FEATURE_X86_CX8)) + case 4: + op = TR::InstOpCode::LCMPXCHG4MemReg; + break; + case 8: + if (comp->target().is64Bit()) + { + op = TR::InstOpCode::LCMPXCHG8MemReg; + } + else if (comp->target().cpu.supportsFeature(OMR_FEATURE_X86_CX8)) + { + op = TR::InstOpCode::LCMPXCHG8BMem; + } + else + { + return false; + } + break; + default: + TR_ASSERT_FATAL_WITH_NODE(node, false, "Unknown dataSize: %d\n", size); return false; - - op = TR::InstOpCode::LCMPXCHG8BMem; } // In Java9 the sun.misc.Unsafe JNI methods have been moved to jdk.internal, @@ -9728,7 +9753,6 @@ inlineCompareAndSwapNative( if (comp->target().is32Bit()) offsetReg = offsetReg->getLowOrder(); } - cg->decReferenceCount(offsetChild); TR::MemoryReference *mr; @@ -9770,10 +9794,20 @@ inlineCompareAndSwapNative( TR::Register *newValueRegister = cg->evaluate(newValueChild); - TR::Register *oldValueRegister = (size == 8) ? - cg->longClobberEvaluate(oldValueChild) : cg->intClobberEvaluate(oldValueChild); + TR::Register *oldValueRegister; + switch (size) + { + case 4: + oldValueRegister = cg->intClobberEvaluate(oldValueChild); + break; + case 8: + oldValueRegister = cg->longClobberEvaluate(oldValueChild); + break; + default: + TR_ASSERT_FATAL_WITH_NODE(node, false, "Unknown dataSize: %d\n", size); + break; + } bool killOldValueRegister = (oldValueChild->getReferenceCount() > 1) ? true : false; - cg->decReferenceCount(oldValueChild); TR::RegisterDependencyConditions *deps; TR_X86ScratchRegisterManager *scratchRegisterManagerForRealTime = NULL; @@ -9815,6 +9849,7 @@ inlineCompareAndSwapNative( TR::MemoryReference *cmpxchgMR = mr; + TR::Register *resultReg; if (op == TR::InstOpCode::LCMPXCHG8BMem) { int numDeps = 4; @@ -9868,15 +9903,32 @@ inlineCompareAndSwapNative( generateMemRegInstruction(op, node, cmpxchgMR, newValueRegister, deps, cg); } + if (isExchange) + { + killOldValueRegister = false; + resultReg = oldValueRegister; + if (isObject) + { + resultReg->setContainsCollectedReference(); + if (TR::Compiler->om.compressedReferenceShiftOffset() != 0) + { + generateRegImmInstruction(TR::InstOpCode::SHLRegImm1(), node, resultReg, TR::Compiler->om.compressedReferenceShiftOffset(), cg); + } + } + } + if (killOldValueRegister) cg->stopUsingRegister(oldValueRegister); if (storeAddressRegForRealTime) scratchRegisterManagerForRealTime->reclaimScratchRegister(storeAddressRegForRealTime); - TR::Register *resultReg = cg->allocateRegister(); - generateRegInstruction(TR::InstOpCode::SETE1Reg, node, resultReg, cg); - generateRegRegInstruction(TR::InstOpCode::MOVZXReg4Reg1, node, resultReg, resultReg, cg); + if (!isExchange) + { + resultReg = cg->allocateRegister(); + generateRegInstruction(TR::InstOpCode::SETE1Reg, node, resultReg, cg); + generateRegRegInstruction(TR::InstOpCode::MOVZXReg4Reg1, node, resultReg, resultReg, cg); + } // Non-realtime: Generate a write barrier for this kind of object. // @@ -9902,8 +9954,17 @@ inlineCompareAndSwapNative( node->setRegister(resultReg); - cg->decReferenceCount(newValueChild); cg->decReferenceCount(objectChild); + if (offsetReg) + { + cg->decReferenceCount(offsetChild); + } + else + { + cg->recursivelyDecReferenceCount(offsetChild); + } + cg->decReferenceCount(oldValueChild); + cg->decReferenceCount(newValueChild); if (bumpedRefCount) cg->decReferenceCount(translatedNode); @@ -9930,6 +9991,8 @@ bool J9::X86::TreeEvaluator::VMinlineCallEvaluator( bool callWasInlined = false; TR::Compilation *comp = cg->comp(); + static bool disableCAEIntrinsic = feGetEnv("TR_DisableCAEIntrinsic") != NULL; + if (methodSymbol) { switch (methodSymbol->getRecognizedMethod()) @@ -10011,26 +10074,54 @@ bool J9::X86::TreeEvaluator::VMinlineCallEvaluator( return false; // Call the native version of NativeThread.current() case TR::sun_misc_Unsafe_compareAndSwapInt_jlObjectJII_Z: { - if(node->isSafeForCGToFastPathUnsafeCall()) - return inlineCompareAndSwapNative(node, 4, false, cg); + if (node->isSafeForCGToFastPathUnsafeCall()) + return inlineCompareAndSwapNative(node, 4, false, false, cg); } break; case TR::sun_misc_Unsafe_compareAndSwapLong_jlObjectJJJ_Z: { - if(node->isSafeForCGToFastPathUnsafeCall()) - return inlineCompareAndSwapNative(node, 8, false, cg); + if (node->isSafeForCGToFastPathUnsafeCall()) + return inlineCompareAndSwapNative(node, 8, false, false, cg); } break; case TR::sun_misc_Unsafe_compareAndSwapObject_jlObjectJjlObjectjlObject_Z: { static bool UseOldCompareAndSwapObject = (bool)feGetEnv("TR_UseOldCompareAndSwapObject"); - if(node->isSafeForCGToFastPathUnsafeCall()) + if (node->isSafeForCGToFastPathUnsafeCall()) + { + if (UseOldCompareAndSwapObject) + return inlineCompareAndSwapNative(node, TR::Compiler->om.sizeofReferenceField(), true, false, cg); + else + { + inlineCompareAndSwapObjectNative(node, cg, false); + return true; + } + } + } + break; + case TR::jdk_internal_misc_Unsafe_compareAndExchangeInt: + { + if (!disableCAEIntrinsic && node->isSafeForCGToFastPathUnsafeCall()) + return inlineCompareAndSwapNative(node, 4, false, true, cg); + } + break; + case TR::jdk_internal_misc_Unsafe_compareAndExchangeLong: + { + if (!disableCAEIntrinsic && node->isSafeForCGToFastPathUnsafeCall()) + return inlineCompareAndSwapNative(node, 8, false, true, cg); + } + break; + case TR::jdk_internal_misc_Unsafe_compareAndExchangeObject: + case TR::jdk_internal_misc_Unsafe_compareAndExchangeReference: + { + static bool UseOldCompareAndSwapObject = (bool)feGetEnv("TR_UseOldCompareAndSwapObject"); + if (!disableCAEIntrinsic && node->isSafeForCGToFastPathUnsafeCall()) { if (UseOldCompareAndSwapObject) - return inlineCompareAndSwapNative(node, (comp->target().is64Bit() && !comp->useCompressedPointers()) ? 8 : 4, true, cg); + return inlineCompareAndSwapNative(node, TR::Compiler->om.sizeofReferenceField(), true, true, cg); else { - inlineCompareAndSwapObjectNative(node, cg); + inlineCompareAndSwapObjectNative(node, cg, true); return true; } } @@ -10526,11 +10617,17 @@ void J9::X86::TreeEvaluator::VMwrtbarWithoutStoreEvaluator( (gcMode == gc_modron_wrtbar_cardmark || gcMode == gc_modron_wrtbar_cardmark_and_oldcheck || gcMode == gc_modron_wrtbar_cardmark_incremental) && - (node->getOpCodeValue()==TR::icall)) { - TR::MethodSymbol *symbol = node->getSymbol()->castToMethodSymbol(); - if (symbol != NULL && symbol->getRecognizedMethod()) - unsafeCallBarrier = true; - } + node->getOpCode().isCall()) + { + TR::MethodSymbol *symbol = node->getSymbol()->castToMethodSymbol(); + if (symbol && + (symbol->getRecognizedMethod() == TR::sun_misc_Unsafe_compareAndSwapObject_jlObjectJjlObjectjlObject_Z || + symbol->getRecognizedMethod() == TR::jdk_internal_misc_Unsafe_compareAndExchangeObject || + symbol->getRecognizedMethod() == TR::jdk_internal_misc_Unsafe_compareAndExchangeReference)) + { + unsafeCallBarrier = true; + } + } bool doCheckConcurrentMarkActive = (gcMode == gc_modron_wrtbar_cardmark