-
Notifications
You must be signed in to change notification settings - Fork 4.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[JIT] Add legacy extended EVEX encoding and EVEX.ND/NF feature to x64 emitter backend #108796
base: main
Are you sure you want to change the base?
Changes from all commits
1820567
d1afc68
2335aa3
6578c58
01eeb80
690aee3
31d7fb4
a995878
74aacf6
c330927
fbf20d1
ea02e70
c74b801
34980b4
2ffdbeb
3a729bb
d943b03
c8fee9c
6ec0e97
1d01003
1acc219
87ad443
bb9905a
86083b2
dfe8760
64761cd
f1aba62
f5cc5a8
7ca8433
bc4d225
deb3814
0d63230
13b8076
42c6cfc
b1a9617
ec5d5ca
ebeaf04
547f01d
3566464
f8e9c4d
6bfd050
4b0085d
5701b1c
6d30388
5d3768c
a5619e4
c71ace6
ca92da9
5d10aef
5f288a6
f4e96b0
637c413
a203a4d
a99705a
b5fa5bf
b69d01e
52539c3
25d66bf
a19da9e
2e8d714
df59342
226fabb
36c6631
5f8a01d
0453630
07868bc
69f7e8b
1c1a894
1be4b12
bfb06c7
9541a99
543d949
a879019
55cbda6
a9a3d5c
0eef560
0480c02
48cec5f
7171e0e
5f7606c
6e33640
02786a1
924ba0e
7bc388b
4bf45ae
fd73268
521f978
b48e3d1
5340893
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -402,12 +402,13 @@ void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, | |
else | ||
{ | ||
// For section constant, the immediate will be relocatable | ||
GetEmitter()->emitIns_R_I(INS_mov, size, reg, imm DEBUGARG(targetHandle) DEBUGARG(gtFlags)); | ||
GetEmitter()->emitIns_R_I(INS_mov, size, reg, imm, | ||
INS_OPTS_NONE DEBUGARG(targetHandle) DEBUGARG(gtFlags)); | ||
} | ||
} | ||
else | ||
{ | ||
GetEmitter()->emitIns_R_I(INS_mov, size, reg, imm DEBUGARG(targetHandle) DEBUGARG(gtFlags)); | ||
GetEmitter()->emitIns_R_I(INS_mov, size, reg, imm, INS_OPTS_NONE DEBUGARG(targetHandle) DEBUGARG(gtFlags)); | ||
} | ||
} | ||
regSet.verifyRegUsed(reg); | ||
|
@@ -738,12 +739,18 @@ void CodeGen::genCodeForNegNot(GenTree* tree) | |
{ | ||
GenTree* operand = tree->gtGetOp1(); | ||
assert(operand->isUsedFromReg()); | ||
regNumber operandReg = genConsumeReg(operand); | ||
regNumber operandReg = genConsumeReg(operand); | ||
instruction ins = genGetInsForOper(tree->OperGet(), targetType); | ||
|
||
inst_Mov(targetType, targetReg, operandReg, /* canSkip */ true); | ||
|
||
instruction ins = genGetInsForOper(tree->OperGet(), targetType); | ||
inst_RV(ins, targetReg, targetType); | ||
if (GetEmitter()->DoJitUseApxNDD(ins) && (targetReg != operandReg)) | ||
{ | ||
GetEmitter()->emitIns_R_R(ins, emitTypeSize(operand), targetReg, operandReg, INS_OPTS_EVEX_nd); | ||
} | ||
else | ||
{ | ||
inst_Mov(targetType, targetReg, operandReg, /* canSkip */ true); | ||
inst_RV(ins, targetReg, targetType); | ||
} | ||
} | ||
|
||
genProduceReg(tree); | ||
|
@@ -1158,12 +1165,49 @@ void CodeGen::genCodeForBinary(GenTreeOp* treeNode) | |
// reg3 = reg3 op reg2 | ||
else | ||
{ | ||
var_types op1Type = op1->TypeGet(); | ||
inst_Mov(op1Type, targetReg, op1reg, /* canSkip */ false); | ||
regSet.verifyRegUsed(targetReg); | ||
gcInfo.gcMarkRegPtrVal(targetReg, op1Type); | ||
dst = treeNode; | ||
src = op2; | ||
if (emit->DoJitUseApxNDD(ins) && !varTypeIsFloating(treeNode)) | ||
{ | ||
// TODO-xarch-apx: | ||
// APX can provide optimal code gen in this case using NDD feature: | ||
// reg3 = op1 op op2 without extra mov | ||
|
||
// see if it can be optimized by inc/dec | ||
if (oper == GT_ADD && op2->isContainedIntOrIImmed() && !treeNode->gtOverflowEx()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The handling here of It's much better as a peephole in emit than something codegen must directly consider, IMO. |
||
{ | ||
if (op2->IsIntegralConst(1)) | ||
{ | ||
emit->emitIns_R_R(INS_inc, emitTypeSize(treeNode), targetReg, op1reg, INS_OPTS_EVEX_nd); | ||
genProduceReg(treeNode); | ||
return; | ||
} | ||
else if (op2->IsIntegralConst(-1)) | ||
{ | ||
emit->emitIns_R_R(INS_dec, emitTypeSize(treeNode), targetReg, op1reg, INS_OPTS_EVEX_nd); | ||
genProduceReg(treeNode); | ||
return; | ||
} | ||
} | ||
|
||
assert(op1reg != targetReg); | ||
assert(op2reg != targetReg); | ||
emit->emitInsBinary(ins, emitTypeSize(treeNode), op1, op2, targetReg); | ||
if (treeNode->gtOverflowEx()) | ||
{ | ||
assert(oper == GT_ADD || oper == GT_SUB); | ||
genCheckOverflow(treeNode); | ||
} | ||
genProduceReg(treeNode); | ||
return; | ||
} | ||
else | ||
{ | ||
var_types op1Type = op1->TypeGet(); | ||
inst_Mov(op1Type, targetReg, op1reg, /* canSkip */ false); | ||
regSet.verifyRegUsed(targetReg); | ||
gcInfo.gcMarkRegPtrVal(targetReg, op1Type); | ||
dst = treeNode; | ||
src = op2; | ||
} | ||
} | ||
|
||
// try to use an inc or dec | ||
|
@@ -1182,6 +1226,7 @@ void CodeGen::genCodeForBinary(GenTreeOp* treeNode) | |
return; | ||
} | ||
} | ||
|
||
regNumber r = emit->emitInsBinary(ins, emitTypeSize(treeNode), dst, src); | ||
noway_assert(r == targetReg); | ||
|
||
|
@@ -1295,6 +1340,24 @@ void CodeGen::genCodeForMul(GenTreeOp* treeNode) | |
} | ||
assert(regOp->isUsedFromReg()); | ||
|
||
if (emit->DoJitUseApxNDD(ins) && regOp->GetRegNum() != mulTargetReg) | ||
{ | ||
// use NDD form to optimize this form: | ||
// mov targetReg, regOp | ||
// imul targetReg, rmOp | ||
// to imul targetReg, regOp rmOp. | ||
emit->emitInsBinary(ins, size, regOp, rmOp, mulTargetReg); | ||
if (requiresOverflowCheck) | ||
{ | ||
// Overflow checking is only used for non-floating point types | ||
noway_assert(!varTypeIsFloating(treeNode)); | ||
|
||
genCheckOverflow(treeNode); | ||
} | ||
genProduceReg(treeNode); | ||
return; | ||
} | ||
|
||
// Setup targetReg when neither of the source operands was a matching register | ||
inst_Mov(targetType, mulTargetReg, regOp->GetRegNum(), /* canSkip */ true); | ||
|
||
|
@@ -4406,23 +4469,23 @@ void CodeGen::genCodeForLockAdd(GenTreeOp* node) | |
if (imm == 1) | ||
{ | ||
// inc [addr] | ||
GetEmitter()->emitIns_AR(INS_inc, size, addr->GetRegNum(), 0); | ||
GetEmitter()->emitIns_AR(INS_inc_no_evex, size, addr->GetRegNum(), 0); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: We should probably keep the existing name since its the baseline instruction. We should rather give the APX specific variant a new name, like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The changes here were made due to the fact that the I definitely agree with the idea that we should make the new naming variants pointing to the instructions with new features and only use them when new features are needed like EGPRs, NDD, and NF. But I will probably need to preserve the REX2 functionality in the original There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
It's definitely fine for an instruction like The main consideration is simply that we don't want the "good name" like If there must be two different entries for the same instruction because the opcodes conflict, then names like |
||
} | ||
else if (imm == -1) | ||
{ | ||
// dec [addr] | ||
GetEmitter()->emitIns_AR(INS_dec, size, addr->GetRegNum(), 0); | ||
GetEmitter()->emitIns_AR(INS_dec_no_evex, size, addr->GetRegNum(), 0); | ||
} | ||
else | ||
{ | ||
// add [addr], imm | ||
GetEmitter()->emitIns_I_AR(INS_add, size, imm, addr->GetRegNum(), 0); | ||
GetEmitter()->emitIns_I_AR(INS_add_no_evex, size, imm, addr->GetRegNum(), 0); | ||
} | ||
} | ||
else | ||
{ | ||
// add [addr], data | ||
GetEmitter()->emitIns_AR_R(INS_add, size, data->GetRegNum(), addr->GetRegNum(), 0); | ||
GetEmitter()->emitIns_AR_R(INS_add_no_evex, size, data->GetRegNum(), addr->GetRegNum(), 0); | ||
} | ||
} | ||
|
||
|
@@ -4449,7 +4512,7 @@ void CodeGen::genLockedInstructions(GenTreeOp* node) | |
|
||
if (node->OperIs(GT_XORR, GT_XAND)) | ||
{ | ||
const instruction ins = node->OperIs(GT_XORR) ? INS_or : INS_and; | ||
const instruction ins = node->OperIs(GT_XORR) ? INS_or_no_evex : INS_and_no_evex; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same comment on |
||
|
||
if (node->IsUnusedValue()) | ||
{ | ||
|
@@ -4841,6 +4904,24 @@ void CodeGen::genCodeForShift(GenTree* tree) | |
genProduceReg(tree); | ||
return; | ||
} | ||
|
||
if (GetEmitter()->DoJitUseApxNDD(ins) && (tree->GetRegNum() != operandReg)) | ||
{ | ||
ins = genMapShiftInsToShiftByConstantIns(ins, shiftByValue); | ||
// If APX is available, we can use NDD to optimize the case when LSRA failed to avoid explicit mov. | ||
// this case might be rarely hit. | ||
if (shiftByValue == 1) | ||
{ | ||
GetEmitter()->emitIns_R_R(ins, emitTypeSize(tree), tree->GetRegNum(), operandReg, INS_OPTS_EVEX_nd); | ||
} | ||
else | ||
{ | ||
GetEmitter()->emitIns_R_R_I(ins, emitTypeSize(tree), tree->GetRegNum(), operandReg, shiftByValue, | ||
INS_OPTS_EVEX_nd); | ||
} | ||
genProduceReg(tree); | ||
return; | ||
} | ||
#endif | ||
// First, move the operand to the destination register and | ||
// later on perform the shift in-place. | ||
|
@@ -4887,6 +4968,15 @@ void CodeGen::genCodeForShift(GenTree* tree) | |
// The operand to be shifted must not be in ECX | ||
noway_assert(operandReg != REG_RCX); | ||
|
||
if (GetEmitter()->DoJitUseApxNDD(ins) && (tree->GetRegNum() != operandReg)) | ||
{ | ||
// If APX is available, we can use NDD to optimize the case when LSRA failed to avoid explicit mov. | ||
// this case might be rarely hit. | ||
GetEmitter()->emitIns_R_R(ins, emitTypeSize(tree), tree->GetRegNum(), operandReg, INS_OPTS_EVEX_nd); | ||
genProduceReg(tree); | ||
return; | ||
} | ||
|
||
inst_Mov(targetType, tree->GetRegNum(), operandReg, /* canSkip */ true); | ||
inst_RV(ins, tree->GetRegNum(), targetType); | ||
} | ||
|
@@ -9237,6 +9327,91 @@ void CodeGen::genAmd64EmitterUnitTestsApx() | |
|
||
theEmitter->emitIns_S(INS_neg, EA_2BYTE, 0, 0); | ||
theEmitter->emitIns_S(INS_not, EA_2BYTE, 0, 0); | ||
|
||
// APX-EVEX | ||
|
||
theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_R10, REG_EAX, REG_ECX, INS_OPTS_EVEX_nd); | ||
theEmitter->emitIns_R_R_R(INS_sub, EA_2BYTE, REG_R10, REG_EAX, REG_ECX, INS_OPTS_EVEX_nd); | ||
theEmitter->emitIns_R_R_R(INS_or, EA_2BYTE, REG_R10, REG_EAX, REG_ECX, INS_OPTS_EVEX_nd); | ||
theEmitter->emitIns_R_R_R(INS_and, EA_2BYTE, REG_R10, REG_EAX, REG_ECX, INS_OPTS_EVEX_nd); | ||
theEmitter->emitIns_R_R_R(INS_xor, EA_1BYTE, REG_R10, REG_EAX, REG_ECX, INS_OPTS_EVEX_nd); | ||
|
||
theEmitter->emitIns_R_R_I(INS_or, EA_2BYTE, REG_R10, REG_EAX, 10565, INS_OPTS_EVEX_nd); | ||
theEmitter->emitIns_R_R_I(INS_or, EA_8BYTE, REG_R10, REG_EAX, 10, INS_OPTS_EVEX_nd); | ||
theEmitter->emitIns_R_R_S(INS_or, EA_8BYTE, REG_R10, REG_EAX, 0, 1, INS_OPTS_EVEX_nd); | ||
|
||
theEmitter->emitIns_R_R(INS_neg, EA_2BYTE, REG_R10, REG_ECX, INS_OPTS_EVEX_nd); | ||
|
||
theEmitter->emitIns_R_R(INS_shl, EA_2BYTE, REG_R11, REG_EAX, INS_OPTS_EVEX_nd); | ||
theEmitter->emitIns_R_R(INS_shl_1, EA_2BYTE, REG_R11, REG_EAX, INS_OPTS_EVEX_nd); | ||
theEmitter->emitIns_R_R_I(INS_shl_N, EA_2BYTE, REG_R11, REG_ECX, 7, INS_OPTS_EVEX_nd); | ||
theEmitter->emitIns_R_R_I(INS_shl_N, EA_2BYTE, REG_R11, REG_ECX, 7, INS_OPTS_EVEX_nd); | ||
theEmitter->emitIns_R_R_I(INS_rcr_N, EA_2BYTE, REG_R11, REG_ECX, 7, INS_OPTS_EVEX_nd); | ||
theEmitter->emitIns_R_R_I(INS_rcl_N, EA_2BYTE, REG_R11, REG_ECX, 7, INS_OPTS_EVEX_nd); | ||
|
||
theEmitter->emitIns_R_R(INS_inc, EA_2BYTE, REG_R11, REG_ECX, INS_OPTS_EVEX_nd); | ||
theEmitter->emitIns_R_R(INS_dec, EA_2BYTE, REG_R11, REG_ECX, INS_OPTS_EVEX_nd); | ||
|
||
theEmitter->emitIns_R_R_R(INS_cmovo, EA_4BYTE, REG_R12, REG_R11, REG_EAX, INS_OPTS_EVEX_nd); | ||
|
||
theEmitter->emitIns_R_R_R(INS_imul, EA_4BYTE, REG_R12, REG_R11, REG_ECX, INS_OPTS_EVEX_nd); | ||
theEmitter->emitIns_R_R_S(INS_imul, EA_4BYTE, REG_R12, REG_R11, 0, 1, INS_OPTS_EVEX_nd); | ||
|
||
theEmitter->emitIns_R_R(INS_add, EA_4BYTE, REG_R12, REG_R11, INS_OPTS_EVEX_nf); | ||
theEmitter->emitIns_R_R(INS_sub, EA_4BYTE, REG_R12, REG_R11, INS_OPTS_EVEX_nf); | ||
theEmitter->emitIns_R_R(INS_and, EA_4BYTE, REG_R12, REG_R11, INS_OPTS_EVEX_nf); | ||
theEmitter->emitIns_R_R(INS_or, EA_4BYTE, REG_R12, REG_R11, INS_OPTS_EVEX_nf); | ||
theEmitter->emitIns_R_R(INS_xor, EA_4BYTE, REG_R12, REG_R11, INS_OPTS_EVEX_nf); | ||
theEmitter->emitIns_R(INS_inc, EA_4BYTE, REG_R12, INS_OPTS_EVEX_nf); | ||
theEmitter->emitIns_R(INS_dec, EA_4BYTE, REG_R12, INS_OPTS_EVEX_nf); | ||
|
||
theEmitter->emitIns_R_I(INS_add, EA_4BYTE, REG_R12, 5, INS_OPTS_EVEX_nf); | ||
theEmitter->emitIns_R_I(INS_sub, EA_4BYTE, REG_R12, 5, INS_OPTS_EVEX_nf); | ||
theEmitter->emitIns_R_I(INS_and, EA_4BYTE, REG_R12, 5, INS_OPTS_EVEX_nf); | ||
theEmitter->emitIns_R_I(INS_or, EA_4BYTE, REG_R12, 5, INS_OPTS_EVEX_nf); | ||
theEmitter->emitIns_R_I(INS_xor, EA_4BYTE, REG_R12, 5, INS_OPTS_EVEX_nf); | ||
|
||
theEmitter->emitIns_R_S(INS_add, EA_4BYTE, REG_R12, 0, 1, INS_OPTS_EVEX_nf); | ||
theEmitter->emitIns_R_S(INS_sub, EA_4BYTE, REG_R12, 0, 1, INS_OPTS_EVEX_nf); | ||
theEmitter->emitIns_R_S(INS_and, EA_4BYTE, REG_R12, 0, 1, INS_OPTS_EVEX_nf); | ||
theEmitter->emitIns_R_S(INS_or, EA_4BYTE, REG_R12, 0, 1, INS_OPTS_EVEX_nf); | ||
theEmitter->emitIns_R_S(INS_xor, EA_4BYTE, REG_R12, 0, 1, INS_OPTS_EVEX_nf); | ||
|
||
theEmitter->emitIns_R(INS_neg, EA_2BYTE, REG_R11, INS_OPTS_EVEX_nf); | ||
theEmitter->emitIns_R(INS_shl, EA_2BYTE, REG_R11, INS_OPTS_EVEX_nf); | ||
theEmitter->emitIns_R(INS_shl_1, EA_2BYTE, REG_R11, INS_OPTS_EVEX_nf); | ||
theEmitter->emitIns_R_I(INS_shl_N, EA_2BYTE, REG_R11, 7, INS_OPTS_EVEX_nf); | ||
theEmitter->emitIns_R_I(INS_shl_N, EA_2BYTE, REG_R11, 7, INS_OPTS_EVEX_nf); | ||
theEmitter->emitIns_R_I(INS_rcr_N, EA_2BYTE, REG_R11, 7, INS_OPTS_EVEX_nf); | ||
theEmitter->emitIns_R_I(INS_rcl_N, EA_2BYTE, REG_R11, 7, INS_OPTS_EVEX_nf); | ||
|
||
theEmitter->emitIns_R_R(INS_imul, EA_4BYTE, REG_R12, REG_R11, INS_OPTS_EVEX_nf); | ||
theEmitter->emitIns_R_S(INS_imul, EA_4BYTE, REG_R12, 0, 1, INS_OPTS_EVEX_nf); | ||
|
||
theEmitter->emitIns_R_I(INS_imul_15, EA_4BYTE, REG_R12, 5, INS_OPTS_EVEX_nf); | ||
|
||
theEmitter->emitIns_R(INS_imulEAX, EA_8BYTE, REG_R12, INS_OPTS_EVEX_nf); | ||
theEmitter->emitIns_R(INS_mulEAX, EA_8BYTE, REG_R12, INS_OPTS_EVEX_nf); | ||
theEmitter->emitIns_R(INS_div, EA_8BYTE, REG_R12, INS_OPTS_EVEX_nf); | ||
theEmitter->emitIns_R(INS_idiv, EA_8BYTE, REG_R12, INS_OPTS_EVEX_nf); | ||
|
||
theEmitter->emitIns_R_R(INS_tzcnt_evex, EA_8BYTE, REG_R12, REG_R11, INS_OPTS_EVEX_nf); | ||
theEmitter->emitIns_R_R(INS_lzcnt_evex, EA_8BYTE, REG_R12, REG_R11, INS_OPTS_EVEX_nf); | ||
theEmitter->emitIns_R_R(INS_popcnt_evex, EA_8BYTE, REG_R12, REG_R11, INS_OPTS_EVEX_nf); | ||
|
||
theEmitter->emitIns_R_S(INS_tzcnt_evex, EA_8BYTE, REG_R12, 0, 1, INS_OPTS_EVEX_nf); | ||
theEmitter->emitIns_R_S(INS_lzcnt_evex, EA_8BYTE, REG_R12, 0, 1, INS_OPTS_EVEX_nf); | ||
theEmitter->emitIns_R_S(INS_popcnt_evex, EA_8BYTE, REG_R12, 0, 1, INS_OPTS_EVEX_nf); | ||
|
||
theEmitter->emitIns_R_R_R(INS_add, EA_2BYTE, REG_R12, REG_R13, REG_R11, | ||
(insOpts)(INS_OPTS_EVEX_nf | INS_OPTS_EVEX_nd)); | ||
|
||
theEmitter->emitIns_R_R_R(INS_andn, EA_8BYTE, REG_R11, REG_R13, REG_R11, INS_OPTS_EVEX_nf); | ||
theEmitter->emitIns_R_R_R(INS_bextr, EA_8BYTE, REG_R11, REG_R13, REG_R11, INS_OPTS_EVEX_nf); | ||
|
||
theEmitter->emitIns_R_R(INS_blsi, EA_8BYTE, REG_R11, REG_R13, INS_OPTS_EVEX_nf); | ||
theEmitter->emitIns_R_R(INS_blsmsk, EA_8BYTE, REG_R11, REG_R13, INS_OPTS_EVEX_nf); | ||
theEmitter->emitIns_R_S(INS_blsr, EA_8BYTE, REG_R11, 0, 1); | ||
} | ||
|
||
void CodeGen::genAmd64EmitterUnitTestsAvx10v2() | ||
|
@@ -11434,7 +11609,7 @@ void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind) | |
if (barrierKind == BARRIER_FULL) | ||
{ | ||
instGen(INS_lock); | ||
GetEmitter()->emitIns_I_AR(INS_or, EA_4BYTE, 0, REG_SPBASE, 0); | ||
GetEmitter()->emitIns_I_AR(INS_or_no_evex, EA_4BYTE, 0, REG_SPBASE, 0); | ||
} | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This general pattern is repeated quite a lot (with some variations), so I wonder if we should have a helper like I added for SIMD.
For example, we have
emitIns_SIMD_R_R_R
which looks like: https://github.com/dotnet/runtime/blob/main/src/coreclr/jit/emitxarch.cpp#L8855-L8880 (other variations exist for handling things like memory operands or immediate; and higher level helpers likegenHWIntrinsic_R_R_RM
exist for determining which of the variations to call betweenemitIns_SIMD_R_R_R
,emitIns_SIMD_R_R_A
,emitIns_SIMD_R_R_C
, andemitIns_SIMD_R_R_S
)This lets us correctly represent any SIMD
dst = src1 op src2
operation given the raw registers and then internally handles the RMW consideration, so that the rest of codegen can remain simpler and more readable.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In this case, for example, it seems like we "could" have simplified this down to something like:
and than had this helper make the distinction of handling
APX
,NDD
, inserting theMov
for the regular case; etcPresumably this would also make the diffs for other APX support much simpler as well, since we have fewer centralized helpers to update.