Skip to content

Commit

Permalink
Merge pull request #20736 from dchopra001/hasPositivesAcceleration
Browse files Browse the repository at this point in the history
  • Loading branch information
r30shah authored Dec 6, 2024
2 parents 3f7abb5 + 31854af commit 78caf1d
Show file tree
Hide file tree
Showing 7 changed files with 99 additions and 19 deletions.
11 changes: 11 additions & 0 deletions runtime/compiler/codegen/J9CodeGenerator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,16 @@ void addMonClass(TR::Node* monNode, TR_OpaqueClassBlock* clazz);
*/
void setSupportsInlineStringHashCode() { _j9Flags.set(SupportsInlineStringHashCode); }

/** \brief
* Determines whether the code generator supports inlining of java/lang/StringCoding.countPositives
*/
bool getSupportsInlineStringCodingCountPositives() { return _j9Flags.testAny(SupportsInlineStringCodingCountPositives); }

/** \brief
* The code generator supports inlining of java/lang/StringCoding.countPositives
*/
void setSupportsInlineStringCodingCountPositives() { _j9Flags.set(SupportsInlineStringCodingCountPositives); }

/** \brief
* Determines whether the code generator supports inlining of java/lang/StringCoding.hasNegatives
*/
Expand Down Expand Up @@ -688,6 +698,7 @@ void addMonClass(TR::Node* monNode, TR_OpaqueClassBlock* clazz);
SupportsInlineVectorizedMismatch = 0x00001000,
SupportsInlineVectorizedHashCode = 0x00002000,
SupportsInlineStringCodingHasNegatives = 0x00004000,
SupportsInlineStringCodingCountPositives = 0x00008000,
};

flags32_t _j9Flags;
Expand Down
1 change: 1 addition & 0 deletions runtime/compiler/codegen/J9RecognizedMethodsEnum.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1222,6 +1222,7 @@
java_lang_StringCoding_StringDecoder_decode,
java_lang_StringCoding_StringEncoder_encode,
java_lang_StringCoding_hasNegatives,
java_lang_StringCoding_countPositives,
java_lang_StringCoding_implEncodeISOArray,
java_lang_StringCoding_implEncodeAsciiArray,
java_lang_StringCoding_encode8859_1,
Expand Down
2 changes: 2 additions & 0 deletions runtime/compiler/env/j9method.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2833,6 +2833,7 @@ void TR_ResolvedJ9Method::construct()
{x(TR::java_lang_StringCoding_decode, "decode", "(Ljava/nio/charset/Charset;[BII)[C")},
{x(TR::java_lang_StringCoding_encode, "encode", "(Ljava/nio/charset/Charset;[CII)[B")},
{x(TR::java_lang_StringCoding_hasNegatives, "hasNegatives", "([BII)Z")},
{x(TR::java_lang_StringCoding_countPositives, "countPositives", "([BII)I")},
{x(TR::java_lang_StringCoding_implEncodeISOArray, "implEncodeISOArray", "([BI[BII)I")},
{x(TR::java_lang_StringCoding_implEncodeAsciiArray, "implEncodeAsciiArray", "([CI[BII)I")},
{x(TR::java_lang_StringCoding_encode8859_1, "encode8859_1", "(B[B)[B")},
Expand Down Expand Up @@ -5133,6 +5134,7 @@ TR_ResolvedJ9Method::setRecognizedMethodInfo(TR::RecognizedMethod rm)
case TR::java_lang_String_hashCodeImplDecompressed:
case TR::java_lang_StringLatin1_inflate:
case TR::java_lang_StringCoding_hasNegatives:
case TR::java_lang_StringCoding_countPositives:
case TR::sun_nio_ch_NativeThread_current:
case TR::com_ibm_crypto_provider_AEScryptInHardware_cbcDecrypt:
case TR::com_ibm_crypto_provider_AEScryptInHardware_cbcEncrypt:
Expand Down
6 changes: 6 additions & 0 deletions runtime/compiler/optimizer/InlinerTempForJ9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5588,6 +5588,12 @@ TR_J9InlinerPolicy::supressInliningRecognizedInitialCallee(TR_CallSite* callsite
return true;
}
break;
case TR::java_lang_StringCoding_countPositives:
if (comp->cg()->getSupportsInlineStringCodingCountPositives())
{
return true;
}
break;
case TR::java_lang_Integer_stringSize:
case TR::java_lang_Long_stringSize:
if (comp->cg()->getSupportsIntegerStringSize())
Expand Down
15 changes: 14 additions & 1 deletion runtime/compiler/z/codegen/J9CodeGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,12 @@ J9::Z::CodeGenerator::initialize()
{
cg->setSupportsInlineStringCodingHasNegatives();
}
static bool disableInlineStringCodingCountPositives = feGetEnv("TR_DisableInlineStringCodingCountPositives") != NULL;
if (cg->getSupportsVectorRegisters() && !disableInlineStringCodingCountPositives &&
!TR::Compiler->om.canGenerateArraylets())
{
cg->setSupportsInlineStringCodingCountPositives();
}

// Similar to AOT, array translate instructions are not supported for remote compiles because instructions such as
// TRTO allocate lookup tables in persistent memory that cannot be relocated.
Expand Down Expand Up @@ -4028,7 +4034,14 @@ J9::Z::CodeGenerator::inlineDirectCall(
case TR::java_lang_StringCoding_hasNegatives:
if (cg->getSupportsInlineStringCodingHasNegatives())
{
resultReg = TR::TreeEvaluator::inlineStringCodingHasNegatives(node, cg);
resultReg = TR::TreeEvaluator::inlineStringCodingHasNegativesOrCountPositives(node, cg, false);
return true;
}
break;
case TR::java_lang_StringCoding_countPositives:
if (cg->getSupportsInlineStringCodingCountPositives())
{
resultReg = TR::TreeEvaluator::inlineStringCodingHasNegativesOrCountPositives(node, cg, true);
return true;
}
break;
Expand Down
81 changes: 64 additions & 17 deletions runtime/compiler/z/codegen/J9TreeEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -859,9 +859,9 @@ J9::Z::TreeEvaluator::pdclearSetSignEvaluator(TR::Node *node, TR::CodeGenerator
}

/*
* This method inlines the Java API StringCoding.hasNegatives(byte src, int off, int len) using
* This method inlines the Java APIs StringCoding.hasNegatives(byte[] src, int off, int len) and StringCoding.countPositives(byte[] src, int off, int len) using
* SIMD instructions.
* The method looks like below on Java 17:
* StringCoding.hasNegatives is available on Java 11, 17 and 21. It looks like below on all platforms:
*
* @IntrinsicCandidate
* public static boolean hasNegatives(byte[] ba, int off, int len) {
Expand All @@ -872,10 +872,22 @@ J9::Z::TreeEvaluator::pdclearSetSignEvaluator(TR::Node *node, TR::CodeGenerator
* }
* return false;
* }
* This routine behaves similarly on Java 11 and 21 as well and so is supported on those platforms too.
*
* StringCoding.countPositives looks like below and is available on Java 21 and newer platforms:
*
* @IntrinsicCandidate
* public static int countPositives(byte[] ba, int off, int len) {
* int limit = off + len;
* for (int i = off; i < limit; i++) {
* if (ba[i] < 0) {
* return i - off;
* }
* }
* return len;
* }
*/
TR::Register*
J9::Z::TreeEvaluator::inlineStringCodingHasNegatives(TR::Node *node, TR::CodeGenerator *cg)
J9::Z::TreeEvaluator::inlineStringCodingHasNegativesOrCountPositives(TR::Node *node, TR::CodeGenerator *cg, bool isCountPositives)
{
TR::Register *inputPtrReg = cg->gprClobberEvaluate(node->getChild(0));
TR::Register *offsetReg = cg->evaluate(node->getChild(1));
Expand All @@ -886,17 +898,27 @@ J9::Z::TreeEvaluator::inlineStringCodingHasNegatives(TR::Node *node, TR::CodeGen
TR::LabelSymbol *cFlowRegionEnd = generateLabelSymbol(cg);
TR::LabelSymbol *cFlowRegionStart = generateLabelSymbol(cg);
TR::LabelSymbol *processOutOfRangeChar = generateLabelSymbol(cg);
TR::LabelSymbol *processCountPositivesOutOfRangeChar = isCountPositives ? generateLabelSymbol(cg) : NULL;

TR::Register *vInput = cg->allocateRegister(TR_VRF);
TR::Register *vUpperLimit = cg->allocateRegister(TR_VRF);
TR::Register *vComparison = cg->allocateRegister(TR_VRF);
TR::Register *numCharsLeftToProcess = cg->allocateRegister(); // off + len
TR::Register *outOfRangeCharIndex = cg->allocateRegister(TR_VRF);
TR::Register *outOfRangeCharIndexGR = isCountPositives ? cg->allocateRegister() : NULL;

TR::Register *returnReg = cg->allocateRegister();
generateRIInstruction(cg, TR::InstOpCode::LGHI, node, returnReg, 0);
if (isCountPositives)
{
generateRRInstruction(cg, TR::InstOpCode::LR, node, returnReg, lengthReg);
}
else
{
generateRIInstruction(cg, TR::InstOpCode::LGHI, node, returnReg, 0);
}

generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart);
cFlowRegionStart->setStartInternalControlFlow();
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::C, node, lengthReg, 0, TR::InstOpCode::COND_BE, cFlowRegionEnd, false, false);
generateRRInstruction(cg, TR::InstOpCode::AGFR, node, inputPtrReg, offsetReg);
generateRRInstruction(cg, TR::InstOpCode::LR, node, numCharsLeftToProcess, lengthReg);
Expand All @@ -910,7 +932,6 @@ J9::Z::TreeEvaluator::inlineStringCodingHasNegatives(TR::Node *node, TR::CodeGen
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::C, node, numCharsLeftToProcess, 16, TR::InstOpCode::COND_BNH, processMultiple16CharsEnd, false, false);

generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processMultiple16CharsStart);
processMultiple16CharsStart->setStartInternalControlFlow();

// Load bytes and search for out of range character
generateVRXInstruction(cg, TR::InstOpCode::VL, node, vInput, generateS390MemoryReference(inputPtrReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg));
Expand Down Expand Up @@ -938,21 +959,43 @@ J9::Z::TreeEvaluator::inlineStringCodingHasNegatives(TR::Node *node, TR::CodeGen
generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, vInput, numCharsLeftToProcess, generateS390MemoryReference(inputPtrReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg));

generateVRRdInstruction(cg, TR::InstOpCode::VSTRC, node, outOfRangeCharIndex, vInput, vUpperLimit, vComparison, 0x1, 0);
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC1, node, processOutOfRangeChar);
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC1, node, isCountPositives ? processCountPositivesOutOfRangeChar : processOutOfRangeChar);

generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, cFlowRegionEnd);

generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processOutOfRangeChar);
generateRIInstruction(cg, TR::InstOpCode::LGHI, node, returnReg, 1);
if (isCountPositives)
{
// numCharsLeftToProcess is reused to load residue bytes in residue handling code. We must reverse this to ensure
// we are calculating the return value of StringCoding.countPositives correctly.
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processCountPositivesOutOfRangeChar);
generateRIInstruction(cg, TR::InstOpCode::AHI, node, numCharsLeftToProcess, 1);
}

TR::RegisterDependencyConditions* dependencies = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 7, cg);
dependencies->addPostConditionIfNotAlreadyInserted(vInput, TR::RealRegister::AssignAny);
dependencies->addPostConditionIfNotAlreadyInserted(outOfRangeCharIndex, TR::RealRegister::AssignAny);
dependencies->addPostConditionIfNotAlreadyInserted(vUpperLimit, TR::RealRegister::AssignAny);
dependencies->addPostConditionIfNotAlreadyInserted(vComparison, TR::RealRegister::AssignAny);
dependencies->addPostConditionIfNotAlreadyInserted(inputPtrReg, TR::RealRegister::AssignAny);
dependencies->addPostConditionIfNotAlreadyInserted(numCharsLeftToProcess, TR::RealRegister::AssignAny);
dependencies->addPostConditionIfNotAlreadyInserted(returnReg, TR::RealRegister::AssignAny);
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processOutOfRangeChar);
if (isCountPositives)
{
generateRRRInstruction(cg, TR::InstOpCode::SRK, node, returnReg, lengthReg, numCharsLeftToProcess);
generateVRScInstruction(cg, TR::InstOpCode::VLGV, node, outOfRangeCharIndexGR, outOfRangeCharIndex, generateS390MemoryReference(7, cg), 0);
generateRRInstruction(cg, TR::InstOpCode::AR, node, returnReg, outOfRangeCharIndexGR);
}
else
{
generateRIInstruction(cg, TR::InstOpCode::LGHI, node, returnReg, 1);
}
TR::RegisterDependencyConditions* dependencies = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, isCountPositives ? 10 : 9, cg);
if (isCountPositives)
{
dependencies->addPostCondition(outOfRangeCharIndexGR, TR::RealRegister::AssignAny);
}
dependencies->addPostCondition(lengthReg, TR::RealRegister::AssignAny);
dependencies->addPostCondition(vInput, TR::RealRegister::AssignAny);
dependencies->addPostCondition(outOfRangeCharIndex, TR::RealRegister::AssignAny);
dependencies->addPostCondition(vUpperLimit, TR::RealRegister::AssignAny);
dependencies->addPostCondition(vComparison, TR::RealRegister::AssignAny);
dependencies->addPostCondition(inputPtrReg, TR::RealRegister::AssignAny);
dependencies->addPostCondition(numCharsLeftToProcess, TR::RealRegister::AssignAny);
dependencies->addPostCondition(returnReg, TR::RealRegister::AssignAny);
dependencies->addPostCondition(offsetReg, TR::RealRegister::AssignAny);

generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, dependencies);
cFlowRegionEnd->setEndInternalControlFlow();
Expand All @@ -967,6 +1010,10 @@ J9::Z::TreeEvaluator::inlineStringCodingHasNegatives(TR::Node *node, TR::CodeGen
cg->stopUsingRegister(vUpperLimit);
cg->stopUsingRegister(vComparison);
cg->stopUsingRegister(numCharsLeftToProcess);
if (isCountPositives)
{
cg->stopUsingRegister(outOfRangeCharIndexGR);
}
node->setRegister(returnReg);
return returnReg;
}
Expand Down
2 changes: 1 addition & 1 deletion runtime/compiler/z/codegen/J9TreeEvaluator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ class OMR_EXTENSIBLE TreeEvaluator: public J9::TreeEvaluator
* Inline Java's (Java 11 onwards) StringLatin1.inflate([BI[CII)V
*/
static TR::Register *inlineStringLatin1Inflate(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *inlineStringCodingHasNegatives(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *inlineStringCodingHasNegativesOrCountPositives(TR::Node *node, TR::CodeGenerator *cg, bool isCountPositives);
static TR::Register *VMinlineCompareAndSwap( TR::Node *node, TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic casOp, bool isObj, bool isExchange = false);
static TR::Register *inlineAtomicOps(TR::Node *node, TR::CodeGenerator *cg, int8_t size, TR::MethodSymbol *method, bool isArray = false);
static TR::Register *inlineAtomicFieldUpdater(TR::Node *node, TR::CodeGenerator *cg, TR::MethodSymbol *method);
Expand Down

0 comments on commit 78caf1d

Please sign in to comment.