Skip to content

Commit

Permalink
Merge pull request #20574 from dchopra001/hasNegativesAcceleration
Browse files Browse the repository at this point in the history
Accelerate StringCoding.hasNegatives on Z
  • Loading branch information
r30shah authored Nov 25, 2024
2 parents d96ca69 + 10901c4 commit 3aff572
Show file tree
Hide file tree
Showing 7 changed files with 148 additions and 0 deletions.
11 changes: 11 additions & 0 deletions runtime/compiler/codegen/J9CodeGenerator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,16 @@ void addMonClass(TR::Node* monNode, TR_OpaqueClassBlock* clazz);
*/
void setSupportsInlineStringHashCode() { _j9Flags.set(SupportsInlineStringHashCode); }

/** \brief
* Determines whether the code generator supports inlining of java/lang/StringCoding.hasNegatives
*/
bool getSupportsInlineStringCodingHasNegatives() { return _j9Flags.testAny(SupportsInlineStringCodingHasNegatives); }

/** \brief
* The code generator supports inlining of java/lang/StringCoding.hasNegatives
*/
void setSupportsInlineStringCodingHasNegatives() { _j9Flags.set(SupportsInlineStringCodingHasNegatives); }

/** \brief
* Determines whether the code generator supports inlining of java/lang/StringLatin1.inflate
*/
Expand Down Expand Up @@ -677,6 +687,7 @@ void addMonClass(TR::Node* monNode, TR_OpaqueClassBlock* clazz);
SavesNonVolatileGPRsForGC = 0x00000800,
SupportsInlineVectorizedMismatch = 0x00001000,
SupportsInlineVectorizedHashCode = 0x00002000,
SupportsInlineStringCodingHasNegatives = 0x00004000,
};

flags32_t _j9Flags;
Expand Down
1 change: 1 addition & 0 deletions runtime/compiler/codegen/J9RecognizedMethodsEnum.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1226,6 +1226,7 @@
java_lang_StringCoding_encode,
java_lang_StringCoding_StringDecoder_decode,
java_lang_StringCoding_StringEncoder_encode,
java_lang_StringCoding_hasNegatives,
java_lang_StringCoding_implEncodeISOArray,
java_lang_StringCoding_implEncodeAsciiArray,
java_lang_StringCoding_encode8859_1,
Expand Down
2 changes: 2 additions & 0 deletions runtime/compiler/env/j9method.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2832,6 +2832,7 @@ void TR_ResolvedJ9Method::construct()
{
{x(TR::java_lang_StringCoding_decode, "decode", "(Ljava/nio/charset/Charset;[BII)[C")},
{x(TR::java_lang_StringCoding_encode, "encode", "(Ljava/nio/charset/Charset;[CII)[B")},
{x(TR::java_lang_StringCoding_hasNegatives, "hasNegatives", "([BII)Z")},
{x(TR::java_lang_StringCoding_implEncodeISOArray, "implEncodeISOArray", "([BI[BII)I")},
{x(TR::java_lang_StringCoding_implEncodeAsciiArray, "implEncodeAsciiArray", "([CI[BII)I")},
{x(TR::java_lang_StringCoding_encode8859_1, "encode8859_1", "(B[B)[B")},
Expand Down Expand Up @@ -5135,6 +5136,7 @@ TR_ResolvedJ9Method::setRecognizedMethodInfo(TR::RecognizedMethod rm)
case TR::java_lang_String_hashCodeImplCompressed:
case TR::java_lang_String_hashCodeImplDecompressed:
case TR::java_lang_StringLatin1_inflate:
case TR::java_lang_StringCoding_hasNegatives:
case TR::sun_nio_ch_NativeThread_current:
case TR::com_ibm_crypto_provider_AEScryptInHardware_cbcDecrypt:
case TR::com_ibm_crypto_provider_AEScryptInHardware_cbcEncrypt:
Expand Down
6 changes: 6 additions & 0 deletions runtime/compiler/optimizer/InlinerTempForJ9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5582,6 +5582,12 @@ TR_J9InlinerPolicy::supressInliningRecognizedInitialCallee(TR_CallSite* callsite
return true;
}
break;
case TR::java_lang_StringCoding_hasNegatives:
if (comp->cg()->getSupportsInlineStringCodingHasNegatives())
{
return true;
}
break;
case TR::java_lang_Integer_stringSize:
case TR::java_lang_Long_stringSize:
if (comp->cg()->getSupportsIntegerStringSize())
Expand Down
14 changes: 14 additions & 0 deletions runtime/compiler/z/codegen/J9CodeGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,13 @@ J9::Z::CodeGenerator::initialize()
cg->setSupportsInlineConcurrentLinkedQueue();
}

static bool disableInlineStringCodingHasNegatives = feGetEnv("TR_DisableInlineStringCodingHasNegatives") != NULL;
if (cg->getSupportsVectorRegisters() && !disableInlineStringCodingHasNegatives &&
!TR::Compiler->om.canGenerateArraylets())
{
cg->setSupportsInlineStringCodingHasNegatives();
}

// Similar to AOT, array translate instructions are not supported for remote compiles because instructions such as
// TRTO allocate lookup tables in persistent memory that cannot be relocated.
if (comp->isOutOfProcessCompilation())
Expand Down Expand Up @@ -4013,6 +4020,13 @@ J9::Z::CodeGenerator::inlineDirectCall(
return resultReg != NULL;
}
break;
case TR::java_lang_StringCoding_hasNegatives:
if (cg->getSupportsInlineStringCodingHasNegatives())
{
resultReg = TR::TreeEvaluator::inlineStringCodingHasNegatives(node, cg);
return true;
}
break;
case TR::com_ibm_jit_JITHelpers_transformedEncodeUTF16Big:
return resultReg = comp->getOption(TR_DisableUTF16BEEncoder) ? TR::TreeEvaluator::inlineUTF16BEEncodeSIMD(node, cg)
: TR::TreeEvaluator::inlineUTF16BEEncode (node, cg);
Expand Down
113 changes: 113 additions & 0 deletions runtime/compiler/z/codegen/J9TreeEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -858,6 +858,119 @@ J9::Z::TreeEvaluator::pdclearSetSignEvaluator(TR::Node *node, TR::CodeGenerator
return TR::TreeEvaluator::pdclearEvaluator(node, cg);
}

/*
* This method inlines the Java API StringCoding.hasNegatives(byte src, int off, int len) using
* SIMD instructions.
* The method looks like below on Java 17:
*
* @IntrinsicCandidate
* public static boolean hasNegatives(byte[] ba, int off, int len) {
* for (int i = off; i < off + len; i++) {
* if (ba[i] < 0) {
* return true;
* }
* }
* return false;
* }
* This routine behaves similarly on Java 11 and 21 as well and so is supported on those platforms too.
*/
TR::Register*
J9::Z::TreeEvaluator::inlineStringCodingHasNegatives(TR::Node *node, TR::CodeGenerator *cg)
{
TR::Register *inputPtrReg = cg->gprClobberEvaluate(node->getChild(0));
TR::Register *offsetReg = cg->evaluate(node->getChild(1));
TR::Register *lengthReg = cg->evaluate(node->getChild(2));

TR::LabelSymbol *processMultiple16CharsStart = generateLabelSymbol(cg);
TR::LabelSymbol *processMultiple16CharsEnd = generateLabelSymbol(cg);
TR::LabelSymbol *cFlowRegionEnd = generateLabelSymbol(cg);
TR::LabelSymbol *cFlowRegionStart = generateLabelSymbol(cg);
TR::LabelSymbol *processOutOfRangeChar = generateLabelSymbol(cg);

TR::Register *vInput = cg->allocateRegister(TR_VRF);
TR::Register *vUpperLimit = cg->allocateRegister(TR_VRF);
TR::Register *vComparison = cg->allocateRegister(TR_VRF);
TR::Register *numCharsLeftToProcess = cg->allocateRegister(); // off + len
TR::Register *outOfRangeCharIndex = cg->allocateRegister(TR_VRF);

TR::Register *returnReg = cg->allocateRegister();
generateRIInstruction(cg, TR::InstOpCode::LGHI, node, returnReg, 0);

generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart);
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::C, node, lengthReg, 0, TR::InstOpCode::COND_BE, cFlowRegionEnd, false, false);
generateRRInstruction(cg, TR::InstOpCode::AGFR, node, inputPtrReg, offsetReg);
generateRRInstruction(cg, TR::InstOpCode::LR, node, numCharsLeftToProcess, lengthReg);

const uint8_t upperLimit = 127;
const uint8_t rangeComparison = 0x20; // > comparison

generateVRIaInstruction(cg, TR::InstOpCode::VREPI, node, vUpperLimit, upperLimit, 0);
generateVRIaInstruction(cg, TR::InstOpCode::VREPI, node, vComparison, rangeComparison, 0);

generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::C, node, numCharsLeftToProcess, 16, TR::InstOpCode::COND_BNH, processMultiple16CharsEnd, false, false);

generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processMultiple16CharsStart);
processMultiple16CharsStart->setStartInternalControlFlow();

// Load bytes and search for out of range character
generateVRXInstruction(cg, TR::InstOpCode::VL, node, vInput, generateS390MemoryReference(inputPtrReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg));

generateVRRdInstruction(cg, TR::InstOpCode::VSTRC, node, outOfRangeCharIndex, vInput, vUpperLimit, vComparison, 0x1, 0);

// process bad character by setting return register to true and exiting
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC1, node, processOutOfRangeChar);

// Update the counters
generateRXInstruction(cg, TR::InstOpCode::getLoadAddressOpCode(), node, inputPtrReg, generateS390MemoryReference(inputPtrReg, 16, cg));
generateRIInstruction(cg, TR::InstOpCode::AHI, node, numCharsLeftToProcess, -16);

// Branch back up if we still have more than 16 characters to process.
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::C, node, numCharsLeftToProcess, 16, TR::InstOpCode::COND_BH, processMultiple16CharsStart, false, false);

generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processMultiple16CharsEnd);

// Zero out the input register to avoid invalid VSTRC result
generateVRIaInstruction(cg, TR::InstOpCode::VGBM, node, vInput, 0, 0 /*unused*/);

// VLL and VSTL work on indices so we subtract 1
generateRIInstruction(cg, TR::InstOpCode::AHI, node, numCharsLeftToProcess, -1);
// Load residue bytes and check for out of range character
generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, vInput, numCharsLeftToProcess, generateS390MemoryReference(inputPtrReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg));

generateVRRdInstruction(cg, TR::InstOpCode::VSTRC, node, outOfRangeCharIndex, vInput, vUpperLimit, vComparison, 0x1, 0);
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC1, node, processOutOfRangeChar);

generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, cFlowRegionEnd);

generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processOutOfRangeChar);
generateRIInstruction(cg, TR::InstOpCode::LGHI, node, returnReg, 1);

TR::RegisterDependencyConditions* dependencies = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 7, cg);
dependencies->addPostConditionIfNotAlreadyInserted(vInput, TR::RealRegister::AssignAny);
dependencies->addPostConditionIfNotAlreadyInserted(outOfRangeCharIndex, TR::RealRegister::AssignAny);
dependencies->addPostConditionIfNotAlreadyInserted(vUpperLimit, TR::RealRegister::AssignAny);
dependencies->addPostConditionIfNotAlreadyInserted(vComparison, TR::RealRegister::AssignAny);
dependencies->addPostConditionIfNotAlreadyInserted(inputPtrReg, TR::RealRegister::AssignAny);
dependencies->addPostConditionIfNotAlreadyInserted(numCharsLeftToProcess, TR::RealRegister::AssignAny);
dependencies->addPostConditionIfNotAlreadyInserted(returnReg, TR::RealRegister::AssignAny);

generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, dependencies);
cFlowRegionEnd->setEndInternalControlFlow();

for (int i = 0; i < node->getNumChildren(); i++)
{
cg->decReferenceCount(node->getChild(i));
}

cg->stopUsingRegister(vInput);
cg->stopUsingRegister(outOfRangeCharIndex);
cg->stopUsingRegister(vUpperLimit);
cg->stopUsingRegister(vComparison);
cg->stopUsingRegister(numCharsLeftToProcess);
node->setRegister(returnReg);
return returnReg;
}

/* Moved from Codegen to FE */
///////////////////////////////////////////////////////////////////////////////////
// Generate code to perform a comparison and branch to a snippet.
Expand Down
1 change: 1 addition & 0 deletions runtime/compiler/z/codegen/J9TreeEvaluator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ class OMR_EXTENSIBLE TreeEvaluator: public J9::TreeEvaluator
* Inline Java's (Java 11 onwards) StringLatin1.inflate([BI[CII)V
*/
static TR::Register *inlineStringLatin1Inflate(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *inlineStringCodingHasNegatives(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *VMinlineCompareAndSwap( TR::Node *node, TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic casOp, bool isObj, bool isExchange = false);
static TR::Register *inlineAtomicOps(TR::Node *node, TR::CodeGenerator *cg, int8_t size, TR::MethodSymbol *method, bool isArray = false);
static TR::Register *inlineAtomicFieldUpdater(TR::Node *node, TR::CodeGenerator *cg, TR::MethodSymbol *method);
Expand Down

0 comments on commit 3aff572

Please sign in to comment.