diff options
author | Robert Mustacchi <rm@joyent.com> | 2019-04-06 01:05:16 +0000 |
---|---|---|
committer | Robert Mustacchi <rm@joyent.com> | 2019-05-04 15:16:05 +0000 |
commit | e4f6ce7088a7dd335b9edf4774325f888692e5fb (patch) | |
tree | f8a4e8c8185cc189e509dbceb0522cafca3357b9 | |
parent | 5392d11ac74a493636fce7ed2e9598b095b68151 (diff) | |
download | illumos-joyent-e4f6ce7088a7dd335b9edf4774325f888692e5fb.tar.gz |
10893 Need support for new Cascade Lake Instructions
Reviewed by: Hans Rosenfeld <hans.rosenfeld@joyent.com>
Reviewed by: Dan McDonald <danmcd@joyent.com>
Reviewed by: Richard Lowe <richlowe@richlowe.net>
Approved by: Gordon Ross <gwr@nexenta.com>
-rw-r--r-- | usr/src/common/dis/i386/dis_tables.c | 11 | ||||
-rw-r--r-- | usr/src/common/elfcap/elfcap.c | 6 | ||||
-rw-r--r-- | usr/src/common/elfcap/elfcap.h | 4 | ||||
-rw-r--r-- | usr/src/pkg/manifests/system-test-utiltest.mf | 4 | ||||
-rw-r--r-- | usr/src/test/util-tests/tests/dis/i386/32.avx512_vnni.out | 52 | ||||
-rw-r--r-- | usr/src/test/util-tests/tests/dis/i386/32.avx512_vnni.s | 137 | ||||
-rw-r--r-- | usr/src/test/util-tests/tests/dis/i386/64.avx512_vnni.out | 52 | ||||
-rw-r--r-- | usr/src/test/util-tests/tests/dis/i386/64.avx512_vnni.s | 137 | ||||
-rw-r--r-- | usr/src/uts/common/sys/auxv_386.h | 6 | ||||
-rw-r--r-- | usr/src/uts/i86pc/os/cpuid.c | 13 | ||||
-rw-r--r-- | usr/src/uts/intel/sys/x86_archext.h | 5 |
11 files changed, 415 insertions, 12 deletions
diff --git a/usr/src/common/dis/i386/dis_tables.c b/usr/src/common/dis/i386/dis_tables.c index afb2b551b2..12a1112d8a 100644 --- a/usr/src/common/dis/i386/dis_tables.c +++ b/usr/src/common/dis/i386/dis_tables.c @@ -21,7 +21,7 @@ */ /* * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2018 Joyent, Inc. + * Copyright 2019, Joyent, Inc. */ /* @@ -1484,7 +1484,7 @@ const instable_t dis_opAVX62[256] = { /* [48] */ INVALID, INVALID, INVALID, INVALID, /* [4C] */ INVALID, INVALID, INVALID, INVALID, -/* [50] */ INVALID, INVALID, INVALID, INVALID, +/* [50] */ TNSZ("vpdpbusd",EVEX_RMrX,16),TNSZ("vpdpbusds",EVEX_RMrX,16),TNSZ("vpdpwssd",EVEX_RMrX,16),TNSZ("vpdpwssds",EVEX_RMrX,16), /* [54] */ TSd("vandp",EVEX_RMrX), TSd("vandnp",EVEX_RMrX), TSd("vorp",EVEX_RMrX), TSd("vxorp",EVEX_RMrX), /* [58] */ INVALID, INVALID, INVALID, INVALID, /* [5C] */ INVALID, INVALID, INVALID, INVALID, @@ -2664,6 +2664,12 @@ dtrace_evex_mnem_adjust(dis86_t *x, instable_t *dp, uint_t vex_W, uint_t evex_byte2) { #ifdef DIS_TEXT + /* No adjustments needed for VNNI instructions. */ + if (dp == &dis_opAVX62[0x50] || dp == &dis_opAVX62[0x51] || + dp == &dis_opAVX62[0x52] || dp == &dis_opAVX62[0x53]) { + return; + } + if (dp == &dis_opAVX62[0x7f] || /* vmovdq */ dp == &dis_opAVX62[0x6f]) { /* Aligned or Unaligned? */ @@ -2688,7 +2694,6 @@ dtrace_evex_mnem_adjust(dis86_t *x, instable_t *dp, uint_t vex_W, break; } } - } else { if (dp->it_avxsuf == AVS5Q) { (void) strlcat(x->d86_mnem, vex_W != 0 ? "q" : "d", diff --git a/usr/src/common/elfcap/elfcap.c b/usr/src/common/elfcap/elfcap.c index c57d54cb01..a5366da1ae 100644 --- a/usr/src/common/elfcap/elfcap.c +++ b/usr/src/common/elfcap/elfcap.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2018, Joyent, Inc. + * Copyright 2019, Joyent, Inc. */ /* LINTLIBRARY */ @@ -411,6 +411,10 @@ static const elfcap_desc_t hw2_386[ELFCAP_NUM_HW2_386] = { { /* 0x02000000 */ AV_386_2_CLZERO, STRDESC("AV_386_2_CLZERO"), STRDESC("CLZERO"), STRDESC("clzero") + }, + { /* 0x04000000 */ + AV_386_2_AVX512_VNNI, STRDESC("AV_386_2_AVX512_VNNI"), + STRDESC("AVX512_VNNI"), STRDESC("avx512_vnni") } }; diff --git a/usr/src/common/elfcap/elfcap.h b/usr/src/common/elfcap/elfcap.h index ff95763800..2a9d64f279 100644 --- a/usr/src/common/elfcap/elfcap.h +++ b/usr/src/common/elfcap/elfcap.h @@ -21,7 +21,7 @@ /* * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2018, Joyent, Inc. + * Copyright 2019, Joyent, Inc. */ #ifndef _ELFCAP_DOT_H @@ -115,7 +115,7 @@ typedef enum { #define ELFCAP_NUM_SF1 3 #define ELFCAP_NUM_HW1_SPARC 17 #define ELFCAP_NUM_HW1_386 32 -#define ELFCAP_NUM_HW2_386 26 +#define ELFCAP_NUM_HW2_386 27 /* diff --git a/usr/src/pkg/manifests/system-test-utiltest.mf b/usr/src/pkg/manifests/system-test-utiltest.mf index 3010f4ea87..df9546a962 100644 --- a/usr/src/pkg/manifests/system-test-utiltest.mf +++ b/usr/src/pkg/manifests/system-test-utiltest.mf @@ -128,6 +128,8 @@ file path=opt/util-tests/tests/dis/i386/32.avx2.out mode=0444 file path=opt/util-tests/tests/dis/i386/32.avx2.s mode=0444 file path=opt/util-tests/tests/dis/i386/32.avx512.out mode=0444 file path=opt/util-tests/tests/dis/i386/32.avx512.s mode=0444 +file path=opt/util-tests/tests/dis/i386/32.avx512_vnni.out mode=0444 +file path=opt/util-tests/tests/dis/i386/32.avx512_vnni.s mode=0444 file path=opt/util-tests/tests/dis/i386/32.bmi1.out mode=0444 file path=opt/util-tests/tests/dis/i386/32.bmi1.s mode=0444 file path=opt/util-tests/tests/dis/i386/32.bmi2.out mode=0444 @@ -178,6 +180,8 @@ file path=opt/util-tests/tests/dis/i386/64.avx2.out mode=0444 file path=opt/util-tests/tests/dis/i386/64.avx2.s mode=0444 file path=opt/util-tests/tests/dis/i386/64.avx512.out mode=0444 file path=opt/util-tests/tests/dis/i386/64.avx512.s mode=0444 +file path=opt/util-tests/tests/dis/i386/64.avx512_vnni.out mode=0444 +file path=opt/util-tests/tests/dis/i386/64.avx512_vnni.s mode=0444 file path=opt/util-tests/tests/dis/i386/64.bmi1.out mode=0444 file path=opt/util-tests/tests/dis/i386/64.bmi1.s mode=0444 file path=opt/util-tests/tests/dis/i386/64.bmi2.out mode=0444 diff --git a/usr/src/test/util-tests/tests/dis/i386/32.avx512_vnni.out b/usr/src/test/util-tests/tests/dis/i386/32.avx512_vnni.out new file mode 100644 index 0000000000..34bc8604d5 --- /dev/null +++ b/usr/src/test/util-tests/tests/dis/i386/32.avx512_vnni.out @@ -0,0 +1,52 @@ + libdis_test: 62 f2 75 08 50 d0 vpdpbusd %xmm0,%xmm1,%xmm2 + libdis_test+0x6: 62 f2 5d 28 50 eb vpdpbusd %ymm3,%ymm4,%ymm5 + libdis_test+0xc: 62 f2 45 48 50 c6 vpdpbusd %zmm6,%zmm7,%zmm0 + libdis_test+0x12: 62 f2 6d 09 50 d9 vpdpbusd %xmm1,%xmm2,%xmm3{%k1} + libdis_test+0x18: 62 f2 55 8a 50 f4 vpdpbusd %xmm4,%xmm5,%xmm6{%k2}{z} + libdis_test+0x1e: 62 f2 7d 2b 50 cf vpdpbusd %ymm7,%ymm0,%ymm1{%k3} + libdis_test+0x24: 62 f2 65 ac 50 e2 vpdpbusd %ymm2,%ymm3,%ymm4{%k4}{z} + libdis_test+0x2a: 62 f2 4d 4d 50 fd vpdpbusd %zmm5,%zmm6,%zmm7{%k5} + libdis_test+0x30: 62 f2 75 ce 50 d0 vpdpbusd %zmm0,%zmm1,%zmm2{%k6}{z} + libdis_test+0x36: 62 f2 65 48 50 a0 vpdpbusd 0x64(%eax),%zmm3,%zmm4 + 64 00 00 00 + libdis_test+0x40: 62 f2 55 48 50 b4 vpdpbusd 0x123456(%ebx,%ecx,8),%zmm5,%zmm6 + cb 56 34 12 00 + libdis_test+0x4b: 62 f2 75 08 51 d0 vpdpbusds %xmm0,%xmm1,%xmm2 + libdis_test+0x51: 62 f2 5d 28 51 eb vpdpbusds %ymm3,%ymm4,%ymm5 + libdis_test+0x57: 62 f2 45 48 51 c6 vpdpbusds %zmm6,%zmm7,%zmm0 + libdis_test+0x5d: 62 f2 6d 09 51 d9 vpdpbusds %xmm1,%xmm2,%xmm3{%k1} + libdis_test+0x63: 62 f2 55 8a 51 f4 vpdpbusds %xmm4,%xmm5,%xmm6{%k2}{z} + libdis_test+0x69: 62 f2 7d 2b 51 cf vpdpbusds %ymm7,%ymm0,%ymm1{%k3} + libdis_test+0x6f: 62 f2 65 ac 51 e2 vpdpbusds %ymm2,%ymm3,%ymm4{%k4}{z} + libdis_test+0x75: 62 f2 4d 4d 51 fd vpdpbusds %zmm5,%zmm6,%zmm7{%k5} + libdis_test+0x7b: 62 f2 75 ce 51 d0 vpdpbusds %zmm0,%zmm1,%zmm2{%k6}{z} + libdis_test+0x81: 62 f2 65 48 51 a0 vpdpbusds 0x64(%eax),%zmm3,%zmm4 + 64 00 00 00 + libdis_test+0x8b: 62 f2 55 48 51 b4 vpdpbusds 0x123456(%ebx,%ecx,8),%zmm5,%zmm6 + cb 56 34 12 00 + libdis_test+0x96: 62 f2 75 08 52 d0 vpdpwssd %xmm0,%xmm1,%xmm2 + libdis_test+0x9c: 62 f2 5d 28 52 eb vpdpwssd %ymm3,%ymm4,%ymm5 + libdis_test+0xa2: 62 f2 45 48 52 c6 vpdpwssd %zmm6,%zmm7,%zmm0 + libdis_test+0xa8: 62 f2 6d 09 52 d9 vpdpwssd %xmm1,%xmm2,%xmm3{%k1} + libdis_test+0xae: 62 f2 55 8a 52 f4 vpdpwssd %xmm4,%xmm5,%xmm6{%k2}{z} + libdis_test+0xb4: 62 f2 7d 2b 52 cf vpdpwssd %ymm7,%ymm0,%ymm1{%k3} + libdis_test+0xba: 62 f2 65 ac 52 e2 vpdpwssd %ymm2,%ymm3,%ymm4{%k4}{z} + libdis_test+0xc0: 62 f2 4d 4d 52 fd vpdpwssd %zmm5,%zmm6,%zmm7{%k5} + libdis_test+0xc6: 62 f2 75 ce 52 d0 vpdpwssd %zmm0,%zmm1,%zmm2{%k6}{z} + libdis_test+0xcc: 62 f2 65 48 52 a0 vpdpwssd 0x64(%eax),%zmm3,%zmm4 + 64 00 00 00 + libdis_test+0xd6: 62 f2 55 48 52 b4 vpdpwssd 0x123456(%ebx,%ecx,8),%zmm5,%zmm6 + cb 56 34 12 00 + libdis_test+0xe1: 62 f2 75 08 53 d0 vpdpwssds %xmm0,%xmm1,%xmm2 + libdis_test+0xe7: 62 f2 5d 28 53 eb vpdpwssds %ymm3,%ymm4,%ymm5 + libdis_test+0xed: 62 f2 45 48 53 c6 vpdpwssds %zmm6,%zmm7,%zmm0 + libdis_test+0xf3: 62 f2 6d 09 53 d9 vpdpwssds %xmm1,%xmm2,%xmm3{%k1} + libdis_test+0xf9: 62 f2 55 8a 53 f4 vpdpwssds %xmm4,%xmm5,%xmm6{%k2}{z} + libdis_test+0xff: 62 f2 7d 2b 53 cf vpdpwssds %ymm7,%ymm0,%ymm1{%k3} + libdis_test+0x105: 62 f2 65 ac 53 e2 vpdpwssds %ymm2,%ymm3,%ymm4{%k4}{z} + libdis_test+0x10b: 62 f2 4d 4d 53 fd vpdpwssds %zmm5,%zmm6,%zmm7{%k5} + libdis_test+0x111: 62 f2 75 ce 53 d0 vpdpwssds %zmm0,%zmm1,%zmm2{%k6}{z} + libdis_test+0x117: 62 f2 65 48 53 a0 vpdpwssds 0x64(%eax),%zmm3,%zmm4 + 64 00 00 00 + libdis_test+0x121: 62 f2 55 48 53 b4 vpdpwssds 0x123456(%ebx,%ecx,8),%zmm5,%zmm6 + cb 56 34 12 00 diff --git a/usr/src/test/util-tests/tests/dis/i386/32.avx512_vnni.s b/usr/src/test/util-tests/tests/dis/i386/32.avx512_vnni.s new file mode 100644 index 0000000000..c8b1889d7d --- /dev/null +++ b/usr/src/test/util-tests/tests/dis/i386/32.avx512_vnni.s @@ -0,0 +1,137 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2019, Joyent, Inc. + */ + +/* + * 32-bit AVX-512 VNNI instruction disassembly. + * + * The support for VNNI instructions in gas is not present in the commonly used + * versions of gas in illumos (it's not present in 2.26.1 or older). Support is + * present in at least 2.30 (though maybe earlier). Rather than have the test + * suite force a version upgrade, we instead encode each instruction as the + * corresponding series of .byte expressions. Each .byte line follows a group of + * commented out assembly instructions and has a one to one mapping between + * them. + */ + +.text +.align 16 +.globl libdis_test +.type libdis_test, @function +libdis_test: + /* + * vpdpbusd %xmm0, %xmm1, %xmm2 + * vpdpbusd %ymm3, %ymm4, %ymm5 + * vpdpbusd %zmm6, %zmm7, %zmm0 + * vpdpbusd %xmm1, %xmm2, %xmm3{%k1} + * vpdpbusd %xmm4, %xmm5, %xmm6{%k2}{z} + * vpdpbusd %ymm7, %ymm0, %ymm1{%k3} + * vpdpbusd %ymm2, %ymm3, %ymm4{%k4}{z} + * vpdpbusd %zmm5, %zmm6, %zmm7{%k5} + * vpdpbusd %zmm0, %zmm1, %zmm2{%k6}{z} + * vpdpbusd 0x64(%eax), %zmm3, %zmm4 + * vpdpbusd 0x123456(%ebx, %ecx, 8), %zmm5, %zmm6 + */ + + .byte 0x62, 0xf2, 0x75, 0x08, 0x50, 0xd0 + .byte 0x62, 0xf2, 0x5d, 0x28, 0x50, 0xeb + .byte 0x62, 0xf2, 0x45, 0x48, 0x50, 0xc6 + .byte 0x62, 0xf2, 0x6d, 0x09, 0x50, 0xd9 + .byte 0x62, 0xf2, 0x55, 0x8a, 0x50, 0xf4 + .byte 0x62, 0xf2, 0x7d, 0x2b, 0x50, 0xcf + .byte 0x62, 0xf2, 0x65, 0xac, 0x50, 0xe2 + .byte 0x62, 0xf2, 0x4d, 0x4d, 0x50, 0xfd + .byte 0x62, 0xf2, 0x75, 0xce, 0x50, 0xd0 + .byte 0x62, 0xf2, 0x65, 0x48, 0x50, 0xa0, 0x64, 0x00, 0x00, 0x00 + .byte 0x62, 0xf2, 0x55, 0x48, 0x50, 0xb4, 0xcb, 0x56, 0x34, 0x12, 0x00 + + + /* + * vpdpbusds %xmm0, %xmm1, %xmm2 + * vpdpbusds %ymm3, %ymm4, %ymm5 + * vpdpbusds %zmm6, %zmm7, %zmm0 + * vpdpbusds %xmm1, %xmm2, %xmm3{%k1} + * vpdpbusds %xmm4, %xmm5, %xmm6{%k2}{z} + * vpdpbusds %ymm7, %ymm0, %ymm1{%k3} + * vpdpbusds %ymm2, %ymm3, %ymm4{%k4}{z} + * vpdpbusds %zmm5, %zmm6, %zmm7{%k5} + * vpdpbusds %zmm0, %zmm1, %zmm2{%k6}{z} + * vpdpbusds 0x64(%eax), %zmm3, %zmm4 + * vpdpbusds 0x123456(%ebx, %ecx, 8), %zmm5, %zmm6 + */ + + .byte 0x62, 0xf2, 0x75, 0x08, 0x51, 0xd0 + .byte 0x62, 0xf2, 0x5d, 0x28, 0x51, 0xeb + .byte 0x62, 0xf2, 0x45, 0x48, 0x51, 0xc6 + .byte 0x62, 0xf2, 0x6d, 0x09, 0x51, 0xd9 + .byte 0x62, 0xf2, 0x55, 0x8a, 0x51, 0xf4 + .byte 0x62, 0xf2, 0x7d, 0x2b, 0x51, 0xcf + .byte 0x62, 0xf2, 0x65, 0xac, 0x51, 0xe2 + .byte 0x62, 0xf2, 0x4d, 0x4d, 0x51, 0xfd + .byte 0x62, 0xf2, 0x75, 0xce, 0x51, 0xd0 + .byte 0x62, 0xf2, 0x65, 0x48, 0x51, 0xa0, 0x64, 0x00, 0x00, 0x00 + .byte 0x62, 0xf2, 0x55, 0x48, 0x51, 0xb4, 0xcb, 0x56, 0x34, 0x12, 0x00 + + /* + * vpdpwssd %xmm0, %xmm1, %xmm2 + * vpdpwssd %ymm3, %ymm4, %ymm5 + * vpdpwssd %zmm6, %zmm7, %zmm0 + * vpdpwssd %xmm1, %xmm2, %xmm3{%k1} + * vpdpwssd %xmm4, %xmm5, %xmm6{%k2}{z} + * vpdpwssd %ymm7, %ymm0, %ymm1{%k3} + * vpdpwssd %ymm2, %ymm3, %ymm4{%k4}{z} + * vpdpwssd %zmm5, %zmm6, %zmm7{%k5} + * vpdpwssd %zmm0, %zmm1, %zmm2{%k6}{z} + * vpdpwssd 0x64(%eax), %zmm3, %zmm4 + * vpdpwssd 0x123456(%ebx, %ecx, 8), %zmm5, %zmm6 + */ + + .byte 0x62, 0xf2, 0x75, 0x08, 0x52, 0xd0 + .byte 0x62, 0xf2, 0x5d, 0x28, 0x52, 0xeb + .byte 0x62, 0xf2, 0x45, 0x48, 0x52, 0xc6 + .byte 0x62, 0xf2, 0x6d, 0x09, 0x52, 0xd9 + .byte 0x62, 0xf2, 0x55, 0x8a, 0x52, 0xf4 + .byte 0x62, 0xf2, 0x7d, 0x2b, 0x52, 0xcf + .byte 0x62, 0xf2, 0x65, 0xac, 0x52, 0xe2 + .byte 0x62, 0xf2, 0x4d, 0x4d, 0x52, 0xfd + .byte 0x62, 0xf2, 0x75, 0xce, 0x52, 0xd0 + .byte 0x62, 0xf2, 0x65, 0x48, 0x52, 0xa0, 0x64, 0x00, 0x00, 0x00 + .byte 0x62, 0xf2, 0x55, 0x48, 0x52, 0xb4, 0xcb, 0x56, 0x34, 0x12, 0x00 + + /* + * vpdpwssds %xmm0, %xmm1, %xmm2 + * vpdpwssds %ymm3, %ymm4, %ymm5 + * vpdpwssds %zmm6, %zmm7, %zmm0 + * vpdpwssds %xmm1, %xmm2, %xmm3{%k1} + * vpdpwssds %xmm4, %xmm5, %xmm6{%k2}{z} + * vpdpwssds %ymm7, %ymm0, %ymm1{%k3} + * vpdpwssds %ymm2, %ymm3, %ymm4{%k4}{z} + * vpdpwssds %zmm5, %zmm6, %zmm7{%k5} + * vpdpwssds %zmm0, %zmm1, %zmm2{%k6}{z} + * vpdpwssds 0x64(%eax), %zmm3, %zmm4 + * vpdpwssds 0x123456(%ebx, %ecx, 8), %zmm5, %zmm6 + */ + + .byte 0x62, 0xf2, 0x75, 0x08, 0x53, 0xd0 + .byte 0x62, 0xf2, 0x5d, 0x28, 0x53, 0xeb + .byte 0x62, 0xf2, 0x45, 0x48, 0x53, 0xc6 + .byte 0x62, 0xf2, 0x6d, 0x09, 0x53, 0xd9 + .byte 0x62, 0xf2, 0x55, 0x8a, 0x53, 0xf4 + .byte 0x62, 0xf2, 0x7d, 0x2b, 0x53, 0xcf + .byte 0x62, 0xf2, 0x65, 0xac, 0x53, 0xe2 + .byte 0x62, 0xf2, 0x4d, 0x4d, 0x53, 0xfd + .byte 0x62, 0xf2, 0x75, 0xce, 0x53, 0xd0 + .byte 0x62, 0xf2, 0x65, 0x48, 0x53, 0xa0, 0x64, 0x00, 0x00, 0x00 + .byte 0x62, 0xf2, 0x55, 0x48, 0x53, 0xb4, 0xcb, 0x56, 0x34, 0x12, 0x00 +.size libdis_test, [.-libdis_test] diff --git a/usr/src/test/util-tests/tests/dis/i386/64.avx512_vnni.out b/usr/src/test/util-tests/tests/dis/i386/64.avx512_vnni.out new file mode 100644 index 0000000000..322b8b461b --- /dev/null +++ b/usr/src/test/util-tests/tests/dis/i386/64.avx512_vnni.out @@ -0,0 +1,52 @@ + libdis_test: 62 f2 75 08 50 d0 vpdpbusd %xmm0,%xmm1,%xmm2 + libdis_test+0x6: 62 f2 5d 28 50 eb vpdpbusd %ymm3,%ymm4,%ymm5 + libdis_test+0xc: 62 72 45 48 50 c6 vpdpbusd %zmm6,%zmm7,%zmm8 + libdis_test+0x12: 62 52 2d 09 50 d9 vpdpbusd %xmm9,%xmm10,%xmm11{%k1} + libdis_test+0x18: 62 52 15 8a 50 f4 vpdpbusd %xmm12,%xmm13,%xmm14{%k2}{z} + libdis_test+0x1e: 62 c2 7d 23 50 cf vpdpbusd %ymm15,%ymm0,%ymm17{%k3} + libdis_test+0x24: 62 a2 65 a4 50 e2 vpdpbusd %ymm18,%ymm3,%ymm20{%k4}{z} + libdis_test+0x2a: 62 a2 4d 45 50 fd vpdpbusd %zmm21,%zmm6,%zmm23{%k5} + libdis_test+0x30: 62 02 35 c6 50 d0 vpdpbusd %zmm24,%zmm9,%zmm26{%k6}{z} + libdis_test+0x36: 62 62 25 40 50 a0 vpdpbusd 0x64(%rax),%zmm11,%zmm28 + 64 00 00 00 + libdis_test+0x40: 62 62 15 40 50 b4 vpdpbusd 0x123456(%rbx,%rcx,8),%zmm13,%zmm30 + cb 56 34 12 00 + libdis_test+0x4b: 62 f2 75 08 51 d0 vpdpbusds %xmm0,%xmm1,%xmm2 + libdis_test+0x51: 62 f2 5d 28 51 eb vpdpbusds %ymm3,%ymm4,%ymm5 + libdis_test+0x57: 62 72 45 48 51 c6 vpdpbusds %zmm6,%zmm7,%zmm8 + libdis_test+0x5d: 62 52 2d 09 51 d9 vpdpbusds %xmm9,%xmm10,%xmm11{%k1} + libdis_test+0x63: 62 52 15 8a 51 f4 vpdpbusds %xmm12,%xmm13,%xmm14{%k2}{z} + libdis_test+0x69: 62 c2 7d 23 51 cf vpdpbusds %ymm15,%ymm0,%ymm17{%k3} + libdis_test+0x6f: 62 a2 65 a4 51 e2 vpdpbusds %ymm18,%ymm3,%ymm20{%k4}{z} + libdis_test+0x75: 62 a2 4d 45 51 fd vpdpbusds %zmm21,%zmm6,%zmm23{%k5} + libdis_test+0x7b: 62 02 35 c6 51 d0 vpdpbusds %zmm24,%zmm9,%zmm26{%k6}{z} + libdis_test+0x81: 62 62 25 40 51 a0 vpdpbusds 0x64(%rax),%zmm11,%zmm28 + 64 00 00 00 + libdis_test+0x8b: 62 62 15 40 51 b4 vpdpbusds 0x123456(%rbx,%rcx,8),%zmm13,%zmm30 + cb 56 34 12 00 + libdis_test+0x96: 62 f2 75 08 52 d0 vpdpwssd %xmm0,%xmm1,%xmm2 + libdis_test+0x9c: 62 f2 5d 28 52 eb vpdpwssd %ymm3,%ymm4,%ymm5 + libdis_test+0xa2: 62 72 45 48 52 c6 vpdpwssd %zmm6,%zmm7,%zmm8 + libdis_test+0xa8: 62 52 2d 09 52 d9 vpdpwssd %xmm9,%xmm10,%xmm11{%k1} + libdis_test+0xae: 62 52 15 8a 52 f4 vpdpwssd %xmm12,%xmm13,%xmm14{%k2}{z} + libdis_test+0xb4: 62 c2 7d 23 52 cf vpdpwssd %ymm15,%ymm0,%ymm17{%k3} + libdis_test+0xba: 62 a2 65 a4 52 e2 vpdpwssd %ymm18,%ymm3,%ymm20{%k4}{z} + libdis_test+0xc0: 62 a2 4d 45 52 fd vpdpwssd %zmm21,%zmm6,%zmm23{%k5} + libdis_test+0xc6: 62 02 35 c6 52 d0 vpdpwssd %zmm24,%zmm9,%zmm26{%k6}{z} + libdis_test+0xcc: 62 62 25 40 52 a0 vpdpwssd 0x64(%rax),%zmm11,%zmm28 + 64 00 00 00 + libdis_test+0xd6: 62 62 15 40 52 b4 vpdpwssd 0x123456(%rbx,%rcx,8),%zmm13,%zmm30 + cb 56 34 12 00 + libdis_test+0xe1: 62 f2 75 08 53 d0 vpdpwssds %xmm0,%xmm1,%xmm2 + libdis_test+0xe7: 62 f2 5d 28 53 eb vpdpwssds %ymm3,%ymm4,%ymm5 + libdis_test+0xed: 62 72 45 48 53 c6 vpdpwssds %zmm6,%zmm7,%zmm8 + libdis_test+0xf3: 62 52 2d 09 53 d9 vpdpwssds %xmm9,%xmm10,%xmm11{%k1} + libdis_test+0xf9: 62 52 15 8a 53 f4 vpdpwssds %xmm12,%xmm13,%xmm14{%k2}{z} + libdis_test+0xff: 62 c2 7d 23 53 cf vpdpwssds %ymm15,%ymm0,%ymm17{%k3} + libdis_test+0x105: 62 a2 65 a4 53 e2 vpdpwssds %ymm18,%ymm3,%ymm20{%k4}{z} + libdis_test+0x10b: 62 a2 4d 45 53 fd vpdpwssds %zmm21,%zmm6,%zmm23{%k5} + libdis_test+0x111: 62 02 35 c6 53 d0 vpdpwssds %zmm24,%zmm9,%zmm26{%k6}{z} + libdis_test+0x117: 62 62 25 40 53 a0 vpdpwssds 0x64(%rax),%zmm11,%zmm28 + 64 00 00 00 + libdis_test+0x121: 62 62 15 40 53 b4 vpdpwssds 0x123456(%rbx,%rcx,8),%zmm13,%zmm30 + cb 56 34 12 00 diff --git a/usr/src/test/util-tests/tests/dis/i386/64.avx512_vnni.s b/usr/src/test/util-tests/tests/dis/i386/64.avx512_vnni.s new file mode 100644 index 0000000000..5cd6a30c13 --- /dev/null +++ b/usr/src/test/util-tests/tests/dis/i386/64.avx512_vnni.s @@ -0,0 +1,137 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2019, Joyent, Inc. + */ + +/* + * 64-bit AVX-512 VNNI instruction disassembly. + * + * The support for VNNI instructions in gas is not present in the commonly used + * versions of gas in illumos (it's not present in 2.26.1 or older). Support is + * present in at least 2.30 (though maybe earlier). Rather than have the test + * suite force a version upgrade, we instead encode each instruction as the + * corresponding series of .byte expressions. Each .byte line follows a group of + * commented out assembly instructions and has a one to one mapping between + * them. + + */ + +.text +.align 16 +.globl libdis_test +.type libdis_test, @function +libdis_test: + /* + * vpdpbusd %xmm0, %xmm1, %xmm2 + * vpdpbusd %ymm3, %ymm4, %ymm5 + * vpdpbusd %zmm6, %zmm7, %zmm8 + * vpdpbusd %xmm9, %xmm10, %xmm11{%k1} + * vpdpbusd %xmm12, %xmm13, %xmm14{%k2}{z} + * vpdpbusd %ymm15, %ymm16, %ymm17{%k3} + * vpdpbusd %ymm18, %ymm19, %ymm20{%k4}{z} + * vpdpbusd %zmm21, %zmm22, %zmm23{%k5} + * vpdpbusd %zmm24, %zmm25, %zmm26{%k6}{z} + * vpdpbusd 0x64(%rax), %zmm27, %zmm28 + * vpdpbusd 0x123456(%rbx, %rcx, 8), %zmm29, %zmm30 + */ + + .byte 0x62, 0xf2, 0x75, 0x08, 0x50, 0xd0 + .byte 0x62, 0xf2, 0x5d, 0x28, 0x50, 0xeb + .byte 0x62, 0x72, 0x45, 0x48, 0x50, 0xc6 + .byte 0x62, 0x52, 0x2d, 0x09, 0x50, 0xd9 + .byte 0x62, 0x52, 0x15, 0x8a, 0x50, 0xf4 + .byte 0x62, 0xc2, 0x7d, 0x23, 0x50, 0xcf + .byte 0x62, 0xa2, 0x65, 0xa4, 0x50, 0xe2 + .byte 0x62, 0xa2, 0x4d, 0x45, 0x50, 0xfd + .byte 0x62, 0x02, 0x35, 0xc6, 0x50, 0xd0 + .byte 0x62, 0x62, 0x25, 0x40, 0x50, 0xa0, 0x64, 0x00, 0x00, 0x00 + .byte 0x62, 0x62, 0x15, 0x40, 0x50, 0xb4, 0xcb, 0x56, 0x34, 0x12, 0x00 + + /* + * vpdpbusds %xmm0, %xmm1, %xmm2 + * vpdpbusds %ymm3, %ymm4, %ymm5 + * vpdpbusds %zmm6, %zmm7, %zmm8 + * vpdpbusds %xmm9, %xmm10, %xmm11{%k1} + * vpdpbusds %xmm12, %xmm13, %xmm14{%k2}{z} + * vpdpbusds %ymm15, %ymm16, %ymm17{%k3} + * vpdpbusds %ymm18, %ymm19, %ymm20{%k4}{z} + * vpdpbusds %zmm21, %zmm22, %zmm23{%k5} + * vpdpbusds %zmm24, %zmm25, %zmm26{%k6}{z} + * vpdpbusds 0x64(%rax), %zmm27, %zmm28 + * vpdpbusds 0x123456(%rbx, %rcx, 8), %zmm29, %zmm30 + */ + + .byte 0x62, 0xf2, 0x75, 0x08, 0x51, 0xd0 + .byte 0x62, 0xf2, 0x5d, 0x28, 0x51, 0xeb + .byte 0x62, 0x72, 0x45, 0x48, 0x51, 0xc6 + .byte 0x62, 0x52, 0x2d, 0x09, 0x51, 0xd9 + .byte 0x62, 0x52, 0x15, 0x8a, 0x51, 0xf4 + .byte 0x62, 0xc2, 0x7d, 0x23, 0x51, 0xcf + .byte 0x62, 0xa2, 0x65, 0xa4, 0x51, 0xe2 + .byte 0x62, 0xa2, 0x4d, 0x45, 0x51, 0xfd + .byte 0x62, 0x02, 0x35, 0xc6, 0x51, 0xd0 + .byte 0x62, 0x62, 0x25, 0x40, 0x51, 0xa0, 0x64, 0x00, 0x00, 0x00 + .byte 0x62, 0x62, 0x15, 0x40, 0x51, 0xb4, 0xcb, 0x56, 0x34, 0x12, 0x00 + + /* + * vpdpwssd %xmm0, %xmm1, %xmm2 + * vpdpwssd %ymm3, %ymm4, %ymm5 + * vpdpwssd %zmm6, %zmm7, %zmm8 + * vpdpwssd %xmm9, %xmm10, %xmm11{%k1} + * vpdpwssd %xmm12, %xmm13, %xmm14{%k2}{z} + * vpdpwssd %ymm15, %ymm16, %ymm17{%k3} + * vpdpwssd %ymm18, %ymm19, %ymm20{%k4}{z} + * vpdpwssd %zmm21, %zmm22, %zmm23{%k5} + * vpdpwssd %zmm24, %zmm25, %zmm26{%k6}{z} + * vpdpwssd 0x64(%rax), %zmm27, %zmm28 + * vpdpwssd 0x123456(%rbx, %rcx, 8), %zmm29, %zmm30 + */ + + .byte 0x62, 0xf2, 0x75, 0x08, 0x52, 0xd0 + .byte 0x62, 0xf2, 0x5d, 0x28, 0x52, 0xeb + .byte 0x62, 0x72, 0x45, 0x48, 0x52, 0xc6 + .byte 0x62, 0x52, 0x2d, 0x09, 0x52, 0xd9 + .byte 0x62, 0x52, 0x15, 0x8a, 0x52, 0xf4 + .byte 0x62, 0xc2, 0x7d, 0x23, 0x52, 0xcf + .byte 0x62, 0xa2, 0x65, 0xa4, 0x52, 0xe2 + .byte 0x62, 0xa2, 0x4d, 0x45, 0x52, 0xfd + .byte 0x62, 0x02, 0x35, 0xc6, 0x52, 0xd0 + .byte 0x62, 0x62, 0x25, 0x40, 0x52, 0xa0, 0x64, 0x00, 0x00, 0x00 + .byte 0x62, 0x62, 0x15, 0x40, 0x52, 0xb4, 0xcb, 0x56, 0x34, 0x12, 0x00 + + /* + * vpdpwssds %xmm0, %xmm1, %xmm2 + * vpdpwssds %ymm3, %ymm4, %ymm5 + * vpdpwssds %zmm6, %zmm7, %zmm8 + * vpdpwssds %xmm9, %xmm10, %xmm11{%k1} + * vpdpwssds %xmm12, %xmm13, %xmm14{%k2}{z} + * vpdpwssds %ymm15, %ymm16, %ymm17{%k3} + * vpdpwssds %ymm18, %ymm19, %ymm20{%k4}{z} + * vpdpwssds %zmm21, %zmm22, %zmm23{%k5} + * vpdpwssds %zmm24, %zmm25, %zmm26{%k6}{z} + * vpdpwssds 0x64(%rax), %zmm27, %zmm28 + * vpdpwssds 0x123456(%rbx, %rcx, 8), %zmm29, %zmm30 + */ + + .byte 0x62, 0xf2, 0x75, 0x08, 0x53, 0xd0 + .byte 0x62, 0xf2, 0x5d, 0x28, 0x53, 0xeb + .byte 0x62, 0x72, 0x45, 0x48, 0x53, 0xc6 + .byte 0x62, 0x52, 0x2d, 0x09, 0x53, 0xd9 + .byte 0x62, 0x52, 0x15, 0x8a, 0x53, 0xf4 + .byte 0x62, 0xc2, 0x7d, 0x23, 0x53, 0xcf + .byte 0x62, 0xa2, 0x65, 0xa4, 0x53, 0xe2 + .byte 0x62, 0xa2, 0x4d, 0x45, 0x53, 0xfd + .byte 0x62, 0x02, 0x35, 0xc6, 0x53, 0xd0 + .byte 0x62, 0x62, 0x25, 0x40, 0x53, 0xa0, 0x64, 0x00, 0x00, 0x00 + .byte 0x62, 0x62, 0x15, 0x40, 0x53, 0xb4, 0xcb, 0x56, 0x34, 0x12, 0x00 +.size libdis_test, [.-libdis_test] diff --git a/usr/src/uts/common/sys/auxv_386.h b/usr/src/uts/common/sys/auxv_386.h index 151912fd03..a1183b9c6b 100644 --- a/usr/src/uts/common/sys/auxv_386.h +++ b/usr/src/uts/common/sys/auxv_386.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2018, Joyent, Inc. + * Copyright 2019, Joyent, Inc. */ #ifndef _SYS_AUXV_386_H @@ -112,9 +112,11 @@ extern "C" { #define AV_386_2_CLWB 0x00800000 /* CLWB insn */ #define AV_386_2_MONITORX 0x01000000 /* MONITORX insns */ #define AV_386_2_CLZERO 0x02000000 /* CLZERO */ +#define AV_386_2_AVX512_VNNI 0x04000000 /* AVX512_VNNI */ #define FMT_AV_386_2 \ - "\32clzero\31monitorx\030clwb\027clflushopt\026fsgsbase" \ + "\033avx512_vnni" \ + "\032clzero\031monitorx\030clwb\027clflushopt\026fsgsbase" \ "\025sha\024avx512_4fmaps\023avx512_4nniw\022avx512vpopcntdq" \ "\021avx512vbmi\020avx512vl\017avx512bw\016avx512cd" \ "\015avx512er\014avx512pf\013avx512ifma\012avx512dq\011avx512f" \ diff --git a/usr/src/uts/i86pc/os/cpuid.c b/usr/src/uts/i86pc/os/cpuid.c index c1928afa79..578dd3c06c 100644 --- a/usr/src/uts/i86pc/os/cpuid.c +++ b/usr/src/uts/i86pc/os/cpuid.c @@ -32,7 +32,7 @@ * Portions Copyright 2009 Advanced Micro Devices, Inc. */ /* - * Copyright (c) 2019, Joyent, Inc. + * Copyright 2019, Joyent, Inc. */ /* @@ -1035,7 +1035,8 @@ static char *x86_feature_names[NUM_X86_FEATURES] = { "clzero", "xop", "fma4", - "tbm" + "tbm", + "avx512_vnni" }; boolean_t @@ -2857,6 +2858,10 @@ cpuid_pass1(cpu_t *cpu, uchar_t *featureset) add_x86_feature(featureset, X86FSET_AVX512VBMI); if (cpi->cpi_std[7].cp_ecx & + CPUID_INTC_ECX_7_0_AVX512VNNI) + add_x86_feature(featureset, + X86FSET_AVX512VNNI); + if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_AVX512VPOPCDQ) add_x86_feature(featureset, X86FSET_AVX512VPOPCDQ); @@ -3526,6 +3531,8 @@ cpuid_pass2(cpu_t *cpu) remove_x86_feature(x86_featureset, X86FSET_AVX512VBMI); remove_x86_feature(x86_featureset, + X86FSET_AVX512VNNI); + remove_x86_feature(x86_featureset, X86FSET_AVX512VPOPCDQ); remove_x86_feature(x86_featureset, X86FSET_AVX512NNIW); @@ -4340,6 +4347,8 @@ cpuid_pass4(cpu_t *cpu, uint_t *hwcap_out) if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VBMI) hwcap_flags_2 |= AV_386_2_AVX512VBMI; + if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VNNI) + hwcap_flags_2 |= AV_386_2_AVX512_VNNI; if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VPOPCDQ) hwcap_flags_2 |= AV_386_2_AVX512VPOPCDQ; diff --git a/usr/src/uts/intel/sys/x86_archext.h b/usr/src/uts/intel/sys/x86_archext.h index c20b45aff0..be50ef1292 100644 --- a/usr/src/uts/intel/sys/x86_archext.h +++ b/usr/src/uts/intel/sys/x86_archext.h @@ -27,7 +27,7 @@ * All rights reserved. */ /* - * Copyright (c) 2019, Joyent, Inc. + * Copyright 2019, Joyent, Inc. * Copyright 2012 Jens Elkner <jel+illumos@cs.uni-magdeburg.de> * Copyright 2012 Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org> * Copyright 2014 Josef 'Jeff' Sipek <jeffpc@josefsipek.net> @@ -562,6 +562,7 @@ extern "C" { #define X86FSET_XOP 88 #define X86FSET_FMA4 89 #define X86FSET_TBM 90 +#define X86FSET_AVX512VNNI 91 /* * Intel Deep C-State invariant TSC in leaf 0x80000007. @@ -931,7 +932,7 @@ extern "C" { #if defined(_KERNEL) || defined(_KMEMUSER) -#define NUM_X86_FEATURES 91 +#define NUM_X86_FEATURES 92 extern uchar_t x86_featureset[]; extern void free_x86_featureset(void *featureset); |