Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add detection for zen 5 #56967

Merged
merged 3 commits into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 27 additions & 1 deletion src/features_x86.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@
#else
#define JL_X86_64ONLY_VER(x) x
#endif
// The code is similar to what is here so the bits can be used as reference
// https://github.com/llvm/llvm-project/blob/3f7905733820851bc4f65cb4af693c3101cbf20d/llvm/lib/TargetParser/Host.cpp#L1257

// The way the bits here work is an index into the features array. This is a bit array
// The index works as follows:
// 32*i + j where i is the index into the array and j is the bit in the array.
// There is a reference to what each index corresponds to in _get_host_cpu

// X86 features definition
// EAX=1: ECX
Expand Down Expand Up @@ -79,6 +86,7 @@ JL_FEATURE_DEF(avx512vp2intersect, 32 * 4 + 8, 0)
JL_FEATURE_DEF(serialize, 32 * 4 + 14, 110000)
JL_FEATURE_DEF(tsxldtrk, 32 * 4 + 16, 110000)
JL_FEATURE_DEF(pconfig, 32 * 4 + 18, 0)
// JL_FEATURE_DEF(ibt, 32 * 4 + 20, 0)
JL_FEATURE_DEF_NAME(amx_bf16, 32 * 4 + 22, 110000, "amx-bf16")
JL_FEATURE_DEF(avx512fp16, 32 * 4 + 23, 140000)
JL_FEATURE_DEF_NAME(amx_tile, 32 * 4 + 24, 110000, "amx-tile")
Expand Down Expand Up @@ -110,10 +118,28 @@ JL_FEATURE_DEF(clzero, 32 * 8 + 0, 0)
JL_FEATURE_DEF(wbnoinvd, 32 * 8 + 9, 0)

// EAX=7,ECX=1: EAX
JL_FEATURE_DEF(sha512, 32 * 9 + 0, 170000)
JL_FEATURE_DEF(sm3, 32 * 9 + 1, 170000)
JL_FEATURE_DEF(sm4, 32 * 9 + 2, 170000)
JL_FEATURE_DEF(raoint, 32 * 9 + 3, 170000)
JL_FEATURE_DEF(avxvnni, 32 * 9 + 4, 120000)
JL_FEATURE_DEF(avx512bf16, 32 * 9 + 5, 0)
JL_FEATURE_DEF(cmpccxadd, 32 * 9 + 7, 160000)
JL_FEATURE_DEF_NAME(amx_fp16, 32 * 9 + 21, 160000, "amx-fp16")
JL_FEATURE_DEF(hreset, 32 * 9 + 22, 160000)
JL_FEATURE_DEF(avxifma, 32 * 9 + 23, 160000)

// EAX=7,ECX=1: EBX
JL_FEATURE_DEF(avxvnniint8, 32 * 10 + 4, 160000)
JL_FEATURE_DEF(avxneconvert, 32 * 10 + 5, 160000)
JL_FEATURE_DEF_NAME(amx_complex, 32 * 10 + 8, 170000, "amx-complex")
JL_FEATURE_DEF(avxvnniint16, 32 * 10 + 10, 170000)
JL_FEATURE_DEF(prefetchi, 32 * 10 + 14, 160000)
JL_FEATURE_DEF(usermsr, 32 * 10 + 15, 170000)
// JL_FEATURE_DEF(avx10, 32 * 10 + 19, 170000) // TODO: What to do about avx10 and it's mess?
// JL_FEATURE_DEF(apxf, 32 * 10 + 21, 190000)

// EAX=0x14,ECX=0: EBX
JL_FEATURE_DEF(ptwrite, 32 * 10 + 4, 0)
JL_FEATURE_DEF(ptwrite, 32 * 11 + 4, 0)

#undef JL_X86_64ONLY_VER
35 changes: 32 additions & 3 deletions src/processor_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,10 @@ enum class CPU : uint32_t {
amd_znver2,
amd_znver3,
amd_znver4,
amd_znver5,
};

static constexpr size_t feature_sz = 11;
static constexpr size_t feature_sz = 12;
static constexpr FeatureName feature_names[] = {
#define JL_FEATURE_DEF(name, bit, llvmver) {#name, bit, llvmver},
#define JL_FEATURE_DEF_NAME(name, bit, llvmver, str) {str, bit, llvmver},
Expand Down Expand Up @@ -141,6 +142,10 @@ static constexpr FeatureDep deps[] = {
{vpclmulqdq, avx},
{vpclmulqdq, pclmul},
{avxvnni, avx2},
{avxvnniint8, avx2},
{avxvnniint16, avx2},
{avxifma, avx2},
{avxneconvert, avx2},
{avx512f, avx2},
{avx512dq, avx512f},
{avx512ifma, avx512f},
Expand All @@ -159,13 +164,18 @@ static constexpr FeatureDep deps[] = {
{avx512fp16, avx512vl},
{amx_int8, amx_tile},
{amx_bf16, amx_tile},
{amx_fp16, amx_tile},
{amx_complex, amx_tile},
{sse4a, sse3},
{xop, fma4},
{fma4, avx},
{fma4, sse4a},
{xsaveopt, xsave},
{xsavec, xsave},
{xsaves, xsave},
{sha512, avx2},
{sm3, avx},
{sm4, avx2},
};

// We require cx16 on 64bit by default. This can be overwritten with `-cx16`
Expand Down Expand Up @@ -236,6 +246,7 @@ constexpr auto znver2 = znver1 | get_feature_masks(clwb, rdpid, wbnoinvd);
constexpr auto znver3 = znver2 | get_feature_masks(shstk, pku, vaes, vpclmulqdq);
constexpr auto znver4 = znver3 | get_feature_masks(avx512f, avx512cd, avx512dq, avx512bw, avx512vl, avx512ifma, avx512vbmi,
avx512vbmi2, avx512vnni, avx512bitalg, avx512vpopcntdq, avx512bf16, gfni, shstk, xsaves);
constexpr auto znver5 = znver4 | get_feature_masks(avxvnni, movdiri, movdir64b, avx512vp2intersect, prefetchi, avxvnni);

}

Expand Down Expand Up @@ -298,6 +309,7 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
{"znver2", CPU::amd_znver2, CPU::generic, 0, Feature::znver2},
{"znver3", CPU::amd_znver3, CPU::amd_znver2, 120000, Feature::znver3},
{"znver4", CPU::amd_znver4, CPU::amd_znver3, 160000, Feature::znver4},
{"znver5", CPU::amd_znver5, CPU::amd_znver4, 190000, Feature::znver5},
};
static constexpr size_t ncpu_names = sizeof(cpus) / sizeof(cpus[0]);

Expand Down Expand Up @@ -575,6 +587,9 @@ static CPU get_amd_processor_name(uint32_t family, uint32_t model, const uint32_
return CPU::amd_znver4;
}
return CPU::amd_znver3; // fallback
case 26:
// if (model <= 0x77)
return CPU::amd_znver5;
}
}

Expand Down Expand Up @@ -660,11 +675,12 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu(void)
int32_t info7[4];
jl_cpuidex(info7, 7, 1);
features[9] = info7[0];
features[10] = info7[1];
}
if (maxleaf >= 0x14) {
int32_t info14[4];
jl_cpuidex(info14, 0x14, 0);
features[10] = info14[1];
features[11] = info14[1];
}

// Fix up AVX bits to account for OS support and match LLVM model
Expand Down Expand Up @@ -705,7 +721,20 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu(void)
else {
cpu = uint32_t(CPU::generic);
}

/* Feature bits to register map
feature[0] = ecx
feature[1] = edx
feature[2] = leaf 7 ebx
feature[3] = leaf 7 ecx
feature[4] = leaf 7 edx
feature[5] = leaf 0x80000001 ecx
feature[6] = leaf 0x80000001 edx
feature[7] = leaf 0xd subleaf 1 eax
feature[8] = leaf 0x80000008 ebx
feature[9] = leaf 7 ebx subleaf 1 eax
feature[10] = leaf 7 ebx subleaf 1 ebx
feature[11] = leaf 0x14 ebx
*/
return std::make_pair(cpu, features);
}

Expand Down
Loading