diff --git a/runtime/compiler/codegen/J9CodeGenerator.hpp b/runtime/compiler/codegen/J9CodeGenerator.hpp index 5bb0a356203..0a514012216 100644 --- a/runtime/compiler/codegen/J9CodeGenerator.hpp +++ b/runtime/compiler/codegen/J9CodeGenerator.hpp @@ -461,6 +461,16 @@ void addMonClass(TR::Node* monNode, TR_OpaqueClassBlock* clazz); */ void setSupportsInlineStringHashCode() { _j9Flags.set(SupportsInlineStringHashCode); } + /** \brief + * Determines whether the code generator supports inlining of java/lang/StringCoding.countPositives + */ + bool getSupportsInlineStringCodingCountPositives() { return _j9Flags.testAny(SupportsInlineStringCodingCountPositives); } + + /** \brief + * The code generator supports inlining of java/lang/StringCoding.countPositives + */ + void setSupportsInlineStringCodingCountPositives() { _j9Flags.set(SupportsInlineStringCodingCountPositives); } + /** \brief * Determines whether the code generator supports inlining of java/lang/StringCoding.hasNegatives */ @@ -688,6 +698,7 @@ void addMonClass(TR::Node* monNode, TR_OpaqueClassBlock* clazz); SupportsInlineVectorizedMismatch = 0x00001000, SupportsInlineVectorizedHashCode = 0x00002000, SupportsInlineStringCodingHasNegatives = 0x00004000, + SupportsInlineStringCodingCountPositives = 0x00008000, }; flags32_t _j9Flags; diff --git a/runtime/compiler/codegen/J9RecognizedMethodsEnum.hpp b/runtime/compiler/codegen/J9RecognizedMethodsEnum.hpp index 69401efa41f..49ad355acb8 100644 --- a/runtime/compiler/codegen/J9RecognizedMethodsEnum.hpp +++ b/runtime/compiler/codegen/J9RecognizedMethodsEnum.hpp @@ -1222,6 +1222,7 @@ java_lang_StringCoding_StringDecoder_decode, java_lang_StringCoding_StringEncoder_encode, java_lang_StringCoding_hasNegatives, + java_lang_StringCoding_countPositives, java_lang_StringCoding_implEncodeISOArray, java_lang_StringCoding_implEncodeAsciiArray, java_lang_StringCoding_encode8859_1, diff --git a/runtime/compiler/env/j9method.cpp b/runtime/compiler/env/j9method.cpp index 2ff733abaca..e9a247dc3db 100644 --- a/runtime/compiler/env/j9method.cpp +++ b/runtime/compiler/env/j9method.cpp @@ -2833,6 +2833,7 @@ void TR_ResolvedJ9Method::construct() {x(TR::java_lang_StringCoding_decode, "decode", "(Ljava/nio/charset/Charset;[BII)[C")}, {x(TR::java_lang_StringCoding_encode, "encode", "(Ljava/nio/charset/Charset;[CII)[B")}, {x(TR::java_lang_StringCoding_hasNegatives, "hasNegatives", "([BII)Z")}, + {x(TR::java_lang_StringCoding_countPositives, "countPositives", "([BII)I")}, {x(TR::java_lang_StringCoding_implEncodeISOArray, "implEncodeISOArray", "([BI[BII)I")}, {x(TR::java_lang_StringCoding_implEncodeAsciiArray, "implEncodeAsciiArray", "([CI[BII)I")}, {x(TR::java_lang_StringCoding_encode8859_1, "encode8859_1", "(B[B)[B")}, @@ -5133,6 +5134,7 @@ TR_ResolvedJ9Method::setRecognizedMethodInfo(TR::RecognizedMethod rm) case TR::java_lang_String_hashCodeImplDecompressed: case TR::java_lang_StringLatin1_inflate: case TR::java_lang_StringCoding_hasNegatives: + case TR::java_lang_StringCoding_countPositives: case TR::sun_nio_ch_NativeThread_current: case TR::com_ibm_crypto_provider_AEScryptInHardware_cbcDecrypt: case TR::com_ibm_crypto_provider_AEScryptInHardware_cbcEncrypt: diff --git a/runtime/compiler/optimizer/InlinerTempForJ9.cpp b/runtime/compiler/optimizer/InlinerTempForJ9.cpp index 0bdc3fd0097..e16b260cf9a 100644 --- a/runtime/compiler/optimizer/InlinerTempForJ9.cpp +++ b/runtime/compiler/optimizer/InlinerTempForJ9.cpp @@ -5588,6 +5588,12 @@ TR_J9InlinerPolicy::supressInliningRecognizedInitialCallee(TR_CallSite* callsite return true; } break; + case TR::java_lang_StringCoding_countPositives: + if (comp->cg()->getSupportsInlineStringCodingCountPositives()) + { + return true; + } + break; case TR::java_lang_Integer_stringSize: case TR::java_lang_Long_stringSize: if (comp->cg()->getSupportsIntegerStringSize()) diff --git a/runtime/compiler/z/codegen/J9CodeGenerator.cpp b/runtime/compiler/z/codegen/J9CodeGenerator.cpp index 75aea131fdc..8fb71450f0e 100644 --- a/runtime/compiler/z/codegen/J9CodeGenerator.cpp +++ b/runtime/compiler/z/codegen/J9CodeGenerator.cpp @@ -126,6 +126,12 @@ J9::Z::CodeGenerator::initialize() { cg->setSupportsInlineStringCodingHasNegatives(); } + static bool disableInlineStringCodingCountPositives = feGetEnv("TR_DisableInlineStringCodingCountPositives") != NULL; + if (cg->getSupportsVectorRegisters() && !disableInlineStringCodingCountPositives && + !TR::Compiler->om.canGenerateArraylets()) + { + cg->setSupportsInlineStringCodingCountPositives(); + } // Similar to AOT, array translate instructions are not supported for remote compiles because instructions such as // TRTO allocate lookup tables in persistent memory that cannot be relocated. @@ -4028,7 +4034,14 @@ J9::Z::CodeGenerator::inlineDirectCall( case TR::java_lang_StringCoding_hasNegatives: if (cg->getSupportsInlineStringCodingHasNegatives()) { - resultReg = TR::TreeEvaluator::inlineStringCodingHasNegatives(node, cg); + resultReg = TR::TreeEvaluator::inlineStringCodingHasNegativesOrCountPositives(node, cg, false); + return true; + } + break; + case TR::java_lang_StringCoding_countPositives: + if (cg->getSupportsInlineStringCodingCountPositives()) + { + resultReg = TR::TreeEvaluator::inlineStringCodingHasNegativesOrCountPositives(node, cg, true); return true; } break; diff --git a/runtime/compiler/z/codegen/J9TreeEvaluator.cpp b/runtime/compiler/z/codegen/J9TreeEvaluator.cpp index 207c72c4aac..faabbcca960 100644 --- a/runtime/compiler/z/codegen/J9TreeEvaluator.cpp +++ b/runtime/compiler/z/codegen/J9TreeEvaluator.cpp @@ -859,9 +859,9 @@ J9::Z::TreeEvaluator::pdclearSetSignEvaluator(TR::Node *node, TR::CodeGenerator } /* - * This method inlines the Java API StringCoding.hasNegatives(byte src, int off, int len) using + * This method inlines the Java APIs StringCoding.hasNegatives(byte[] src, int off, int len) and StringCoding.countPositives(byte[] src, int off, int len) using * SIMD instructions. - * The method looks like below on Java 17: + * StringCoding.hasNegatives is available on Java 11, 17 and 21. It looks like below on all platforms: * * @IntrinsicCandidate * public static boolean hasNegatives(byte[] ba, int off, int len) { @@ -872,10 +872,22 @@ J9::Z::TreeEvaluator::pdclearSetSignEvaluator(TR::Node *node, TR::CodeGenerator * } * return false; * } - * This routine behaves similarly on Java 11 and 21 as well and so is supported on those platforms too. + * + * StringCoding.countPositives looks like below and is available on Java 21 and newer platforms: + * + * @IntrinsicCandidate + * public static int countPositives(byte[] ba, int off, int len) { + * int limit = off + len; + * for (int i = off; i < limit; i++) { + * if (ba[i] < 0) { + * return i - off; + * } + * } + * return len; + * } */ TR::Register* -J9::Z::TreeEvaluator::inlineStringCodingHasNegatives(TR::Node *node, TR::CodeGenerator *cg) +J9::Z::TreeEvaluator::inlineStringCodingHasNegativesOrCountPositives(TR::Node *node, TR::CodeGenerator *cg, bool isCountPositives) { TR::Register *inputPtrReg = cg->gprClobberEvaluate(node->getChild(0)); TR::Register *offsetReg = cg->evaluate(node->getChild(1)); @@ -886,17 +898,27 @@ J9::Z::TreeEvaluator::inlineStringCodingHasNegatives(TR::Node *node, TR::CodeGen TR::LabelSymbol *cFlowRegionEnd = generateLabelSymbol(cg); TR::LabelSymbol *cFlowRegionStart = generateLabelSymbol(cg); TR::LabelSymbol *processOutOfRangeChar = generateLabelSymbol(cg); + TR::LabelSymbol *processCountPositivesOutOfRangeChar = isCountPositives ? generateLabelSymbol(cg) : NULL; TR::Register *vInput = cg->allocateRegister(TR_VRF); TR::Register *vUpperLimit = cg->allocateRegister(TR_VRF); TR::Register *vComparison = cg->allocateRegister(TR_VRF); TR::Register *numCharsLeftToProcess = cg->allocateRegister(); // off + len TR::Register *outOfRangeCharIndex = cg->allocateRegister(TR_VRF); + TR::Register *outOfRangeCharIndexGR = isCountPositives ? cg->allocateRegister() : NULL; TR::Register *returnReg = cg->allocateRegister(); - generateRIInstruction(cg, TR::InstOpCode::LGHI, node, returnReg, 0); + if (isCountPositives) + { + generateRRInstruction(cg, TR::InstOpCode::LR, node, returnReg, lengthReg); + } + else + { + generateRIInstruction(cg, TR::InstOpCode::LGHI, node, returnReg, 0); + } generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart); + cFlowRegionStart->setStartInternalControlFlow(); generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::C, node, lengthReg, 0, TR::InstOpCode::COND_BE, cFlowRegionEnd, false, false); generateRRInstruction(cg, TR::InstOpCode::AGFR, node, inputPtrReg, offsetReg); generateRRInstruction(cg, TR::InstOpCode::LR, node, numCharsLeftToProcess, lengthReg); @@ -910,7 +932,6 @@ J9::Z::TreeEvaluator::inlineStringCodingHasNegatives(TR::Node *node, TR::CodeGen generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::C, node, numCharsLeftToProcess, 16, TR::InstOpCode::COND_BNH, processMultiple16CharsEnd, false, false); generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processMultiple16CharsStart); - processMultiple16CharsStart->setStartInternalControlFlow(); // Load bytes and search for out of range character generateVRXInstruction(cg, TR::InstOpCode::VL, node, vInput, generateS390MemoryReference(inputPtrReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg)); @@ -938,21 +959,43 @@ J9::Z::TreeEvaluator::inlineStringCodingHasNegatives(TR::Node *node, TR::CodeGen generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, vInput, numCharsLeftToProcess, generateS390MemoryReference(inputPtrReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg)); generateVRRdInstruction(cg, TR::InstOpCode::VSTRC, node, outOfRangeCharIndex, vInput, vUpperLimit, vComparison, 0x1, 0); - generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC1, node, processOutOfRangeChar); + generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC1, node, isCountPositives ? processCountPositivesOutOfRangeChar : processOutOfRangeChar); generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, cFlowRegionEnd); - generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processOutOfRangeChar); - generateRIInstruction(cg, TR::InstOpCode::LGHI, node, returnReg, 1); + if (isCountPositives) + { + // numCharsLeftToProcess is reused to load residue bytes in residue handling code. We must reverse this to ensure + // we are calculating the return value of StringCoding.countPositives correctly. + generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processCountPositivesOutOfRangeChar); + generateRIInstruction(cg, TR::InstOpCode::AHI, node, numCharsLeftToProcess, 1); + } - TR::RegisterDependencyConditions* dependencies = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 7, cg); - dependencies->addPostConditionIfNotAlreadyInserted(vInput, TR::RealRegister::AssignAny); - dependencies->addPostConditionIfNotAlreadyInserted(outOfRangeCharIndex, TR::RealRegister::AssignAny); - dependencies->addPostConditionIfNotAlreadyInserted(vUpperLimit, TR::RealRegister::AssignAny); - dependencies->addPostConditionIfNotAlreadyInserted(vComparison, TR::RealRegister::AssignAny); - dependencies->addPostConditionIfNotAlreadyInserted(inputPtrReg, TR::RealRegister::AssignAny); - dependencies->addPostConditionIfNotAlreadyInserted(numCharsLeftToProcess, TR::RealRegister::AssignAny); - dependencies->addPostConditionIfNotAlreadyInserted(returnReg, TR::RealRegister::AssignAny); + generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processOutOfRangeChar); + if (isCountPositives) + { + generateRRRInstruction(cg, TR::InstOpCode::SRK, node, returnReg, lengthReg, numCharsLeftToProcess); + generateVRScInstruction(cg, TR::InstOpCode::VLGV, node, outOfRangeCharIndexGR, outOfRangeCharIndex, generateS390MemoryReference(7, cg), 0); + generateRRInstruction(cg, TR::InstOpCode::AR, node, returnReg, outOfRangeCharIndexGR); + } + else + { + generateRIInstruction(cg, TR::InstOpCode::LGHI, node, returnReg, 1); + } + TR::RegisterDependencyConditions* dependencies = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, isCountPositives ? 10 : 9, cg); + if (isCountPositives) + { + dependencies->addPostCondition(outOfRangeCharIndexGR, TR::RealRegister::AssignAny); + } + dependencies->addPostCondition(lengthReg, TR::RealRegister::AssignAny); + dependencies->addPostCondition(vInput, TR::RealRegister::AssignAny); + dependencies->addPostCondition(outOfRangeCharIndex, TR::RealRegister::AssignAny); + dependencies->addPostCondition(vUpperLimit, TR::RealRegister::AssignAny); + dependencies->addPostCondition(vComparison, TR::RealRegister::AssignAny); + dependencies->addPostCondition(inputPtrReg, TR::RealRegister::AssignAny); + dependencies->addPostCondition(numCharsLeftToProcess, TR::RealRegister::AssignAny); + dependencies->addPostCondition(returnReg, TR::RealRegister::AssignAny); + dependencies->addPostCondition(offsetReg, TR::RealRegister::AssignAny); generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, dependencies); cFlowRegionEnd->setEndInternalControlFlow(); @@ -967,6 +1010,10 @@ J9::Z::TreeEvaluator::inlineStringCodingHasNegatives(TR::Node *node, TR::CodeGen cg->stopUsingRegister(vUpperLimit); cg->stopUsingRegister(vComparison); cg->stopUsingRegister(numCharsLeftToProcess); + if (isCountPositives) + { + cg->stopUsingRegister(outOfRangeCharIndexGR); + } node->setRegister(returnReg); return returnReg; } diff --git a/runtime/compiler/z/codegen/J9TreeEvaluator.hpp b/runtime/compiler/z/codegen/J9TreeEvaluator.hpp index e9b6d522a39..4adf0414fe5 100644 --- a/runtime/compiler/z/codegen/J9TreeEvaluator.hpp +++ b/runtime/compiler/z/codegen/J9TreeEvaluator.hpp @@ -73,7 +73,7 @@ class OMR_EXTENSIBLE TreeEvaluator: public J9::TreeEvaluator * Inline Java's (Java 11 onwards) StringLatin1.inflate([BI[CII)V */ static TR::Register *inlineStringLatin1Inflate(TR::Node *node, TR::CodeGenerator *cg); - static TR::Register *inlineStringCodingHasNegatives(TR::Node *node, TR::CodeGenerator *cg); + static TR::Register *inlineStringCodingHasNegativesOrCountPositives(TR::Node *node, TR::CodeGenerator *cg, bool isCountPositives); static TR::Register *VMinlineCompareAndSwap( TR::Node *node, TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic casOp, bool isObj, bool isExchange = false); static TR::Register *inlineAtomicOps(TR::Node *node, TR::CodeGenerator *cg, int8_t size, TR::MethodSymbol *method, bool isArray = false); static TR::Register *inlineAtomicFieldUpdater(TR::Node *node, TR::CodeGenerator *cg, TR::MethodSymbol *method);