From 01ff327e6e17d7c662f687ca2e8b68e3849af7ec Mon Sep 17 00:00:00 2001 From: midronij Date: Wed, 23 Aug 2023 13:52:59 -0400 Subject: [PATCH] Skip unecessary runtime checks for Unsafe.CAS() For Unsafe.compareAndSwap(), do not generate array check IL or adjusted array access block if object type is known at compile time. Signed-off-by: midronij --- .../compiler/optimizer/InlinerTempForJ9.cpp | 118 ++++++++++++++---- 1 file changed, 91 insertions(+), 27 deletions(-) diff --git a/runtime/compiler/optimizer/InlinerTempForJ9.cpp b/runtime/compiler/optimizer/InlinerTempForJ9.cpp index 3bf32d1b199..cb14b495d7e 100644 --- a/runtime/compiler/optimizer/InlinerTempForJ9.cpp +++ b/runtime/compiler/optimizer/InlinerTempForJ9.cpp @@ -831,11 +831,13 @@ TR_J9InlinerPolicy::createAnchorNodesForUnsafeGetPut(TR::TreeTop* treeTop, TR::Node* node = treeTop->getNode(); TR::TreeTop *compRefTT = - TR::TreeTop::create(comp(), genCompressedRefs(isUnsafeGet ? node->getFirstChild() : node, - false)); + TR::TreeTop::create(comp(), genCompressedRefs(isUnsafeGet?node->getFirstChild():node, + false)); + TR::TreeTop *prevTT = treeTop->getPrevTreeTop(); - prevTT->join(compRefTT); + if (prevTT != NULL) prevTT->join(compRefTT); compRefTT->join(isUnsafeGet ? treeTop : treeTop->getNextTreeTop()); + } } @@ -1664,22 +1666,66 @@ TR_J9InlinerPolicy::createUnsafeCASCallDiamond( TR::TreeTop *callNodeTreeTop, TR // Codegens have a fast path for the compare and swaps, but cannot deal with the case where the offset value passed in to a the CAS is low tagged // (A low tagged offset value means the object being passed in is a java/lang/Class object, and we want a static field) + // Regarding which checks/diamonds get generated, there are three possible cases: + // 1.) Only the low tagged check is generated. This will occur either when gencon GC policy is being used, or under + // balanced GC policy with offheap allocation enabled if the object being operated on is known NOT to be an array + // at compile time. + // 2.) No checks are generated. This will occur under balanced GC policy with offheap allocation enabled if the object + // being operated on is known to be an array at compile time (since if the object is an array, it can't also be a + // java/lang/Class object). + // 3.) Both the array and low tagged checks are generated. This will occur under balanced GC policy with offheap allocation + // enabled if the type of the object being operated on is unknown at compile time. + // This method assumes the offset node is of type long, and is the second child of the unsafe call. TR_InlinerDelimiter delimiter(tracer(),"createUnsafeCASCallDiamond"); debugTrace(tracer(),"Transforming unsafe callNode = %p",callNode); + int length; + const char *objTypeSig = callNode->getChild(1)->getSymbolReference() ? callNode->getChild(1)->getSymbolReference()->getTypeSignature(length) : NULL; + + // There are three cases where we cannot be sure of the Object type at compile time: + // 1.) Object type info is unknown/inaccessible + // 2.) Object type is known at compile time to be java/lang/Object + // 3.) Object type is known at compile time to be some interface classes (e.g.: java/lang/Cloneable, java/io/Serializable) + bool objTypeUnknown; + + if (objTypeSig == NULL) + { + objTypeUnknown = true; + } + else + { + TR_OpaqueClassBlock *objClass = comp()->fe()->getClassFromSignature(objTypeSig, length, callNode->getChild(1)->getSymbolReference()->getOwningMethod(comp())); + bool isParameter = callNode->getChild(1)->getSymbolReference()->getSymbol()->isParm(); + + objTypeUnknown = objClass == NULL || objClass == comp()->getObjectClassPointer() || TR::Compiler->cls.isInterfaceClass(comp(), objClass) || isParameter; + } + + //only generate array test in case (3) + bool arrayTestNeeded = TR::Compiler->om.isOffHeapAllocationEnabled() && comp()->target().is64Bit() && objTypeUnknown; + + //only generate separate array access block in cases (2) or (3) + bool arrayBlockNeeded = TR::Compiler->om.isOffHeapAllocationEnabled() && comp()->target().is64Bit() && + (arrayTestNeeded || objTypeSig[0] == '['); + createTempsForUnsafeCall(callNodeTreeTop, callNode); TR::Node *offsetNode = callNode->getChild(2); - TR::TreeTop *compareTree = genClassCheckForUnsafeGetPut(offsetNode, /* branchIfLowTagged */ false); + TR::TreeTop *compareTree; + + //do not generate low tagged test in case (2) + if (!arrayTestNeeded && arrayBlockNeeded) + compareTree = NULL; + else + compareTree = genClassCheckForUnsafeGetPut(offsetNode, /* branchIfLowTagged */ false ); TR::TreeTop *isArrayTreeTop = NULL; TR::TreeTop *arrayAccessTreeTop = NULL; TR::TreeTop *nonArrayAccessTreeTop = NULL; #if defined (J9VM_GC_ENABLE_SPARSE_HEAP_ALLOCATION) - if (TR::Compiler->om.isOffHeapAllocationEnabled()) + if (arrayTestNeeded) { //create arrayCHK treetop @@ -1701,8 +1747,10 @@ TR_J9InlinerPolicy::createUnsafeCASCallDiamond( TR::TreeTop *callNodeTreeTop, TR TR::Node *isArrayNode = TR::Node::createif(TR::ificmpne, andNode, TR::Node::create(TR::iconst, 0), NULL); isArrayTreeTop = TR::TreeTop::create(comp(), isArrayNode, NULL, NULL); + } - + if (arrayBlockNeeded) + { //create array access treetop //adjust arguments if object is array and offheap is being used by changing //object base address (second child) to dataAddr @@ -1735,21 +1783,27 @@ TR_J9InlinerPolicy::createUnsafeCASCallDiamond( TR::TreeTop *callNodeTreeTop, TR if (isArrayTreeTop) nonArrayAccessTreeTop = TR::TreeTop::create(comp(),callNodeTreeTop->getNode()->duplicateTree()); } -#endif /* TR_TARGET_64BIT */ +#endif /* J9VM_GC_ENABLE_SPARSE_HEAP_ALLOCATION */ - // genClassCheck generates a ifcmpne offset&mask 1, meaning if it is NOT - // lowtagged (ie offset&mask == 0), the branch will be taken - TR::TreeTop *branchTargetTree = TR::TreeTop::create(comp(),callNodeTreeTop->getNode()->duplicateTree()); - branchTargetTree->getNode()->getFirstChild()->setIsSafeForCGToFastPathUnsafeCall(true); + TR::TreeTop *branchTargetTree; + TR::TreeTop *fallThroughTree; - TR::TreeTop *fallThroughTree = TR::TreeTop::create(comp(),callNodeTreeTop->getNode()->duplicateTree()); + //only generate if and else trees for low tagged check in cases (1) and (3) + if (compareTree != NULL) + { + // genClassCheck generates a ifcmpne offset&mask 1, meaning if it is NOT + // lowtagged (ie offset&mask == 0), the branch will be taken + branchTargetTree = TR::TreeTop::create(comp(),callNodeTreeTop->getNode()->duplicateTree()); + branchTargetTree->getNode()->getFirstChild()->setIsSafeForCGToFastPathUnsafeCall(true); + fallThroughTree = TR::TreeTop::create(comp(),callNodeTreeTop->getNode()->duplicateTree()); - branchTargetTree->getNode()->getFirstChild()->setVisitCount(_inliner->getVisitCount()); - fallThroughTree->getNode()->getFirstChild()->setVisitCount(_inliner->getVisitCount()); + branchTargetTree->getNode()->getFirstChild()->setVisitCount(_inliner->getVisitCount()); + fallThroughTree->getNode()->getFirstChild()->setVisitCount(_inliner->getVisitCount()); - debugTrace(tracer(),"branchTargetTree = %p fallThroughTree = %p",branchTargetTree->getNode(),fallThroughTree->getNode()); + debugTrace(tracer(),"branchTargetTree = %p fallThroughTree = %p",branchTargetTree->getNode(),fallThroughTree->getNode()); + } // the call itself may be commoned, so we need to create a temp for the callnode itself @@ -1768,33 +1822,43 @@ TR_J9InlinerPolicy::createUnsafeCASCallDiamond( TR::TreeTop *callNodeTreeTop, TR TR::Block *callBlock = callNodeTreeTop->getEnclosingBlock(); - if (isArrayTreeTop != NULL) //offheap case: array check generated + if (arrayTestNeeded) //in case (3), we generate the array test diamond, followed by the low tagged check test { callBlock->createConditionalBlocksBeforeTree(callNodeTreeTop, isArrayTreeTop, arrayAccessTreeTop, nonArrayAccessTreeTop, comp()->getFlowGraph(), false, false); nonArrayAccessTreeTop->getEnclosingBlock()->createConditionalBlocksBeforeTree(nonArrayAccessTreeTop, compareTree, branchTargetTree, fallThroughTree, comp()->getFlowGraph(), false, false); } - else //default case (gencon): no array check generated + else if (arrayBlockNeeded) //in case (2), no branching is needed: we simply need to replace the original CAS call with the modified array access block + { + callNodeTreeTop->insertAfter(arrayAccessTreeTop); + callNodeTreeTop->getPrevTreeTop()->join(callNodeTreeTop->getNextTreeTop()); + callBlock->split(arrayAccessTreeTop->getNextTreeTop(), comp()->getFlowGraph(), true); + callBlock->split(arrayAccessTreeTop, comp()->getFlowGraph(), true); + } + else if (compareTree != NULL) //in case (1), we only generate the low tagged check diamond callBlock->createConditionalBlocksBeforeTree(callNodeTreeTop, compareTree, branchTargetTree, fallThroughTree, comp()->getFlowGraph(), false, false); - // the original call will be deleted by createConditionalBlocksBeforeTree, but if the refcount was > 1, we need to insert stores. + // the original call will be deleted by createConditionalBlocksBeforeTree, but if the refcount was > 1, we need to insert stores. if (newSymbolReference) { - TR::Node *branchTargetStoreNode = TR::Node::createWithSymRef(comp()->il.opCodeForDirectStore(dataType), 1, 1, branchTargetTree->getNode()->getFirstChild(), newSymbolReference); - TR::TreeTop *branchTargetStoreTree = TR::TreeTop::create(comp(), branchTargetStoreNode); + if (compareTree != NULL) //case (1) and (3) only + { + TR::Node *branchTargetStoreNode = TR::Node::createWithSymRef(comp()->il.opCodeForDirectStore(dataType), 1, 1, branchTargetTree->getNode()->getFirstChild(), newSymbolReference); + TR::TreeTop *branchTargetStoreTree = TR::TreeTop::create(comp(), branchTargetStoreNode); - branchTargetTree->insertAfter(branchTargetStoreTree); + branchTargetTree->insertAfter(branchTargetStoreTree); - debugTrace(tracer(),"Inserted store tree %p for branch target (taken) side of the diamond",branchTargetStoreNode); + debugTrace(tracer(),"Inserted store tree %p for branch target (taken) side of the diamond", branchTargetStoreNode); - TR::Node *fallThroughStoreNode = TR::Node::createWithSymRef(comp()->il.opCodeForDirectStore(dataType), 1, 1, fallThroughTree->getNode()->getFirstChild(), newSymbolReference); - TR::TreeTop *fallThroughStoreTree = TR::TreeTop::create(comp(), fallThroughStoreNode); + TR::Node *fallThroughStoreNode = TR::Node::createWithSymRef(comp()->il.opCodeForDirectStore(dataType), 1, 1, fallThroughTree->getNode()->getFirstChild(), newSymbolReference); + TR::TreeTop *fallThroughStoreTree = TR::TreeTop::create(comp(), fallThroughStoreNode); - fallThroughTree->insertAfter(fallThroughStoreTree); + fallThroughTree->insertAfter(fallThroughStoreTree); - debugTrace(tracer(),"Inserted store tree %p for fall-through side of the diamond",fallThroughStoreNode); + debugTrace(tracer(),"Inserted store tree %p for fall-through side of the diamond", fallThroughStoreNode); + } - if (arrayAccessTreeTop != NULL) + if (arrayAccessTreeTop != NULL) //case (1) only { TR::Node *arrayAccessStoreNode = TR::Node::createWithSymRef(comp()->il.opCodeForDirectStore(dataType), 1, 1, arrayAccessTreeTop->getNode()->getFirstChild(), newSymbolReference); TR::TreeTop *arrayAccessStoreTree = TR::TreeTop::create(comp(), arrayAccessStoreNode);