Skip to content

Commit

Permalink
Add detection for Intel Advanced Matrix Extensions (AMX) instructions (
Browse files Browse the repository at this point in the history
…pytorch#231)

Tested using intel SDE: https://www.intel.com/content/www/us/en/download/684897/intel-software-development-emulator.html

Test scripts:
```
bash scripts/local-build.sh

ISAS=()
OPTIONS=()
PLATFORMS=()

OPTIONS+=(-quark); PLATFORMS+=("Quark")
OPTIONS+=(-p4); PLATFORMS+=("Pentium4")
OPTIONS+=(-p4p); PLATFORMS+=("Pentium4 Prescott")
OPTIONS+=(-mrm); PLATFORMS+=("Merom")
OPTIONS+=(-pnr); PLATFORMS+=("Penryn")
OPTIONS+=(-nhm); PLATFORMS+=("Nehalem")
OPTIONS+=(-wsm); PLATFORMS+=("Westmere")
OPTIONS+=(-snb); PLATFORMS+=("Sandy Bridge")
OPTIONS+=(-ivb); PLATFORMS+=("Ivy Bridge")
OPTIONS+=(-hsw); PLATFORMS+=("Haswell")
OPTIONS+=(-bdw); PLATFORMS+=("Broadwell")
OPTIONS+=(-slt); PLATFORMS+=("Saltwell")
OPTIONS+=(-slm); PLATFORMS+=("Silvermont")
OPTIONS+=(-glm); PLATFORMS+=("Goldmont")
OPTIONS+=(-glp); PLATFORMS+=("Goldmont Plus")
OPTIONS+=(-tnt); PLATFORMS+=("Tremont")
OPTIONS+=(-snr); PLATFORMS+=("Snow Ridge")
OPTIONS+=(-skl); PLATFORMS+=("Skylake")
OPTIONS+=(-cnl); PLATFORMS+=("Cannon Lake")
OPTIONS+=(-icl); PLATFORMS+=("Ice Lake")
OPTIONS+=(-skx); PLATFORMS+=("Skylake server")
OPTIONS+=(-clx); PLATFORMS+=("Cascade Lake")
OPTIONS+=(-cpx); PLATFORMS+=("Cooper Lake")
OPTIONS+=(-icx); PLATFORMS+=("Ice Lake server")
OPTIONS+=(-knl); PLATFORMS+=("Knights landing")
OPTIONS+=(-knm); PLATFORMS+=("Knights mill")
OPTIONS+=(-tgl); PLATFORMS+=("Tiger Lake")
OPTIONS+=(-adl); PLATFORMS+=("Alder Lake")
OPTIONS+=(-mtl); PLATFORMS+=("Meteor Lake")
OPTIONS+=(-rpl); PLATFORMS+=("Raptor Lake")
OPTIONS+=(-spr); PLATFORMS+=("Sapphire Rapids")
OPTIONS+=(-gnr); PLATFORMS+=("Granite Rapids")
OPTIONS+=(-gnr256); PLATFORMS+=("Granite Rapids (AVX10.1 / 256VL)")
OPTIONS+=(-srf); PLATFORMS+=("Sierra Forest")
OPTIONS+=(-arl); PLATFORMS+=("Arrow Lake")
OPTIONS+=(-lnl); PLATFORMS+=("Lunar Lake")
OPTIONS+=(-future); PLATFORMS+=("Future chip")

ISAS+=("AMXBF16")
ISAS+=("AMXTILE")
ISAS+=("AMXINT8")
ISAS+=("AMXFP16")

SDE_BIN="/home/mingfeim/packages/sde-external-9.33.0-2024-01-07-lin/sde"

for I in "${!PLATFORMS[@]}"; do
  echo "${PLATFORMS["${I}"]}"
    for J in "${!ISAS[@]}"; do
      "${SDE_BIN}" "${OPTIONS[$I]}" -- ./build/local/isa-info | grep ${ISAS[$J]}
    done
done
```


Results:
```
Quark
SDE-ERROR: 64 bits applications are not supported by input chip: PENTIUM or by the input cpuid definition file
SDE-ERROR: 64 bits applications are not supported by input chip: PENTIUM or by the input cpuid definition file
SDE-ERROR: 64 bits applications are not supported by input chip: PENTIUM or by the input cpuid definition file
SDE-ERROR: 64 bits applications are not supported by input chip: PENTIUM or by the input cpuid definition file
Pentium4
SDE-ERROR: 64 bits applications are not supported by input chip: PENTIUM4 or by the input cpuid definition file
SDE-ERROR: 64 bits applications are not supported by input chip: PENTIUM4 or by the input cpuid definition file
SDE-ERROR: 64 bits applications are not supported by input chip: PENTIUM4 or by the input cpuid definition file
SDE-ERROR: 64 bits applications are not supported by input chip: PENTIUM4 or by the input cpuid definition file
Pentium4 Prescott
        AMXBF16: no
        AMXTILE: no
        AMXINT8: no
        AMXFP16: no
Merom
        AMXBF16: no
        AMXTILE: no
        AMXINT8: no
        AMXFP16: no
Penryn
        AMXBF16: no
        AMXTILE: no
        AMXINT8: no
        AMXFP16: no
Nehalem
        AMXBF16: no
        AMXTILE: no
        AMXINT8: no
        AMXFP16: no
Westmere
        AMXBF16: no
        AMXTILE: no
        AMXINT8: no
        AMXFP16: no
Sandy Bridge
        AMXBF16: no
        AMXTILE: no
        AMXINT8: no
        AMXFP16: no
Ivy Bridge
        AMXBF16: no
        AMXTILE: no
        AMXINT8: no
        AMXFP16: no
Haswell
        AMXBF16: no
        AMXTILE: no
        AMXINT8: no
        AMXFP16: no
Broadwell
        AMXBF16: no
        AMXTILE: no
        AMXINT8: no
        AMXFP16: no
Saltwell
        AMXBF16: no
        AMXTILE: no
        AMXINT8: no
        AMXFP16: no
Silvermont
        AMXBF16: no
        AMXTILE: no
        AMXINT8: no
        AMXFP16: no
Goldmont
        AMXBF16: no
        AMXTILE: no
        AMXINT8: no
        AMXFP16: no
Goldmont Plus
        AMXBF16: no
        AMXTILE: no
        AMXINT8: no
        AMXFP16: no
Tremont
        AMXBF16: no
        AMXTILE: no
        AMXINT8: no
        AMXFP16: no
Snow Ridge
        AMXBF16: no
        AMXTILE: no
        AMXINT8: no
        AMXFP16: no
Skylake
        AMXBF16: no
        AMXTILE: no
        AMXINT8: no
        AMXFP16: no
Cannon Lake
        AMXBF16: no
        AMXTILE: no
        AMXINT8: no
        AMXFP16: no
Ice Lake
        AMXBF16: no
        AMXTILE: no
        AMXINT8: no
        AMXFP16: no
Skylake server
        AMXBF16: no
        AMXTILE: no
        AMXINT8: no
        AMXFP16: no
Cascade Lake
        AMXBF16: no
        AMXTILE: no
        AMXINT8: no
        AMXFP16: no
Cooper Lake
        AMXBF16: no
        AMXTILE: no
        AMXINT8: no
        AMXFP16: no
Ice Lake server
        AMXBF16: no
        AMXTILE: no
        AMXINT8: no
        AMXFP16: no
Knights landing
        AMXBF16: no
        AMXTILE: no
        AMXINT8: no
        AMXFP16: no
Knights mill
        AMXBF16: no
        AMXTILE: no
        AMXINT8: no
        AMXFP16: no
Tiger Lake
        AMXBF16: no
        AMXTILE: no
        AMXINT8: no
        AMXFP16: no
Alder Lake
        AMXBF16: no
        AMXTILE: no
        AMXINT8: no
        AMXFP16: no
Meteor Lake
        AMXBF16: no
        AMXTILE: no
        AMXINT8: no
        AMXFP16: no
Raptor Lake
        AMXBF16: no
        AMXTILE: no
        AMXINT8: no
        AMXFP16: no
Sapphire Rapids
        AMXBF16: yes
        AMXTILE: yes
        AMXINT8: yes
        AMXFP16: no
Granite Rapids
        AMXBF16: yes
        AMXTILE: yes
        AMXINT8: yes
        AMXFP16: yes
Granite Rapids (AVX10.1 / 256VL)
        AMXBF16: yes
        AMXTILE: yes
        AMXINT8: yes
        AMXFP16: yes
Sierra Forest
        AMXBF16: no
        AMXTILE: no
        AMXINT8: no
        AMXFP16: no
Arrow Lake
        AMXBF16: no
        AMXTILE: no
        AMXINT8: no
        AMXFP16: no
Lunar Lake
        AMXBF16: no
        AMXTILE: no
        AMXINT8: no
        AMXFP16: no
Future chip
        AMXBF16: yes
        AMXTILE: yes
        AMXINT8: yes
        AMXFP16: yes
```
  • Loading branch information
mingfeima authored Mar 28, 2024
1 parent 6543fec commit f42f5ea
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 0 deletions.
56 changes: 56 additions & 0 deletions include/cpuinfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -812,6 +812,10 @@ struct cpuinfo_x86_isa {
bool avx512vp2intersect;
bool avx512_4vnniw;
bool avx512_4fmaps;
bool amx_bf16;
bool amx_tile;
bool amx_int8;
bool amx_fp16;
bool hle;
bool rtm;
bool xtest;
Expand Down Expand Up @@ -1328,6 +1332,58 @@ static inline bool cpuinfo_has_x86_avx512_4fmaps(void) {
#endif
}

/* [NOTE] Intel Advanced Matrix Extensions (AMX) detection
*
* I. AMX is a new extensions to the x86 ISA to work on matrices, consists of
* 1) 2-dimentional registers (tiles), hold sub-matrices from larger matrices in memory
* 2) Accelerator called Tile Matrix Multiply (TMUL), contains instructions operating on tiles
*
* II. Platforms that supports AMX:
* +-----------------+-----+----------+----------+----------+----------+
* | Platforms | Gen | amx-bf16 | amx-tile | amx-int8 | amx-fp16 |
* +-----------------+-----+----------+----------+----------+----------+
* | Sapphire Rapids | 4th | YES | YES | YES | NO |
* +-----------------+-----+----------+----------+----------+----------+
* | Emerald Rapids | 5th | YES | YES | YES | NO |
* +-----------------+-----+----------+----------+----------+----------+
* | Granite Rapids | 6th | YES | YES | YES | YES |
* +-----------------+-----+----------+----------+----------+----------+
*
* Reference: https://www.intel.com/content/www/us/en/products/docs
* /accelerator-engines/advanced-matrix-extensions/overview.html
*/
static inline bool cpuinfo_has_x86_amx_bf16(void) {
#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
return cpuinfo_isa.amx_bf16;
#else
return false;
#endif
}

static inline bool cpuinfo_has_x86_amx_tile(void) {
#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
return cpuinfo_isa.amx_tile;
#else
return false;
#endif
}

static inline bool cpuinfo_has_x86_amx_int8(void) {
#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
return cpuinfo_isa.amx_int8;
#else
return false;
#endif
}

static inline bool cpuinfo_has_x86_amx_fp16(void) {
#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
return cpuinfo_isa.amx_fp16;
#else
return false;
#endif
}

static inline bool cpuinfo_has_x86_hle(void) {
#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
return cpuinfo_isa.hle;
Expand Down
24 changes: 24 additions & 0 deletions src/x86/isa.c
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,30 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
*/
isa.avx512bf16 = avx512_regs && !!(structured_feature_info1.eax & UINT32_C(0x00000020));

/*
* AMX_BF16 instructions:
* - Intel: edx[bit 22] in structured feature info (ecx = 0).
*/
isa.amx_bf16 = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x00400000));

/*
* AMX_TILE instructions:
* - Intel: edx[bit 24] in structured feature info (ecx = 0).
*/
isa.amx_tile = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x01000000));

/*
* AMX_INT8 instructions:
* - Intel: edx[bit 25] in structured feature info (ecx = 0).
*/
isa.amx_int8 = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x02000000));

/*
* AMX_FP16 instructions:
* - Intel: eax[bit 21] in structured feature info (ecx = 1).
*/
isa.amx_fp16 = avx512_regs && !!(structured_feature_info1.eax & UINT32_C(0x00200000));

/*
* HLE instructions:
* - Intel: ebx[bit 4] in structured feature info (ecx = 0).
Expand Down
4 changes: 4 additions & 0 deletions tools/isa-info.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ int main(int argc, char** argv) {
printf("\tAVX512VP2INTERSECT: %s\n", cpuinfo_has_x86_avx512vp2intersect() ? "yes" : "no");
printf("\tAVX512_4VNNIW: %s\n", cpuinfo_has_x86_avx512_4vnniw() ? "yes" : "no");
printf("\tAVX512_4FMAPS: %s\n", cpuinfo_has_x86_avx512_4fmaps() ? "yes" : "no");
printf("\tAMX_BF16: %s\n", cpuinfo_has_x86_amx_bf16() ? "yes" : "no");
printf("\tAMX_TILE: %s\n", cpuinfo_has_x86_amx_tile() ? "yes" : "no");
printf("\tAMX_INT8: %s\n", cpuinfo_has_x86_amx_int8() ? "yes" : "no");
printf("\tAMX_FP16: %s\n", cpuinfo_has_x86_amx_fp16() ? "yes" : "no");
printf("\tAVXVNNI: %s\n", cpuinfo_has_x86_avxvnni() ? "yes" : "no");

printf("Multi-threading extensions:\n");
Expand Down

0 comments on commit f42f5ea

Please sign in to comment.