diff options
author | Robert Mustacchi <rm@joyent.com> | 2018-08-26 00:01:52 +0000 |
---|---|---|
committer | Robert Mustacchi <rm@joyent.com> | 2018-11-02 23:42:46 +0000 |
commit | bab4dcafbe412880696fcb05ab46ef0d48abb18f (patch) | |
tree | d4154c4e194411d31363b8993cca2a2eae7f3dfc | |
parent | e6151003c8efce19988229b39ba3dbb5551dc6f1 (diff) | |
download | illumos-joyent-bab4dcafbe412880696fcb05ab46ef0d48abb18f.tar.gz |
OS-5558 libmd should leverage SHA extensions
Reviewed by: Dan McDonald <danmcd@joyent.com>
Approved by: Jason King <jason.king@joyent.com>
-rw-r--r-- | usr/src/common/crypto/sha1/amd64/THIRDPARTYLICENSE.sha1-ni | 31 | ||||
-rw-r--r-- | usr/src/common/crypto/sha1/amd64/THIRDPARTYLICENSE.sha1-ni.descrip | 1 | ||||
-rw-r--r-- | usr/src/common/crypto/sha1/amd64/sha1-ni.s | 319 | ||||
-rw-r--r-- | usr/src/common/crypto/sha2/amd64/THIRDPARTYLICENSE.sha256-ni | 32 | ||||
-rw-r--r-- | usr/src/common/crypto/sha2/amd64/THIRDPARTYLICENSE.sha256-ni.descrip | 1 | ||||
-rw-r--r-- | usr/src/common/crypto/sha2/amd64/sha256-ni.s | 378 | ||||
-rw-r--r-- | usr/src/lib/libmd/amd64/Makefile | 31 | ||||
-rw-r--r-- | usr/src/lib/libmd/amd64/capabilities/sha.cap | 29 | ||||
-rw-r--r-- | usr/src/lib/libmd/common/mapfile-vers | 19 |
9 files changed, 838 insertions, 3 deletions
diff --git a/usr/src/common/crypto/sha1/amd64/THIRDPARTYLICENSE.sha1-ni b/usr/src/common/crypto/sha1/amd64/THIRDPARTYLICENSE.sha1-ni new file mode 100644 index 0000000000..eb0dcb0454 --- /dev/null +++ b/usr/src/common/crypto/sha1/amd64/THIRDPARTYLICENSE.sha1-ni @@ -0,0 +1,31 @@ +Intel SHA Extensions optimized implementation of a SHA-1 update function + +BSD LICENSE + +Copyright(c) 2015 Intel Corporation. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/usr/src/common/crypto/sha1/amd64/THIRDPARTYLICENSE.sha1-ni.descrip b/usr/src/common/crypto/sha1/amd64/THIRDPARTYLICENSE.sha1-ni.descrip new file mode 100644 index 0000000000..a4bedba9ef --- /dev/null +++ b/usr/src/common/crypto/sha1/amd64/THIRDPARTYLICENSE.sha1-ni.descrip @@ -0,0 +1 @@ +PORTIONS OF SHA1 FUNCTIONALITY diff --git a/usr/src/common/crypto/sha1/amd64/sha1-ni.s b/usr/src/common/crypto/sha1/amd64/sha1-ni.s new file mode 100644 index 0000000000..1b294a889e --- /dev/null +++ b/usr/src/common/crypto/sha1/amd64/sha1-ni.s @@ -0,0 +1,319 @@ +/* + * Intel SHA Extensions optimized implementation of a SHA-1 update function + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * Sean Gulley <sean.m.gulley@intel.com> + * Tim Chen <tim.c.chen@linux.intel.com> + * + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * Copyright (c) 2018, Joyent, Inc. + */ + +/* + * illumos uses this file under the terms of the BSD license. + * + * The following are a series of changes that we have made to this code: + * + * o Changed the include to be sys/asm_linkage.h + * o Use the sys/asm_linkage.h prototypes for assembly functions + * o Renamed the function from sha1_ni_transform to sha1_block_data_order to + * match the illumos name for the function + */ + +#include <sys/asm_linkage.h> + +#define DIGEST_PTR %rdi /* 1st arg */ +#define DATA_PTR %rsi /* 2nd arg */ +#define NUM_BLKS %rdx /* 3rd arg */ + +#define RSPSAVE %rax + +/* gcc conversion */ +#define FRAME_SIZE 32 /* space for 2x16 bytes */ + +#define ABCD %xmm0 +#define E0 %xmm1 /* Need two E's b/c they ping pong */ +#define E1 %xmm2 +#define MSG0 %xmm3 +#define MSG1 %xmm4 +#define MSG2 %xmm5 +#define MSG3 %xmm6 +#define SHUF_MASK %xmm7 + + +/* + * Intel SHA Extensions optimized implementation of a SHA-1 update function + * + * The function takes a pointer to the current hash values, a pointer to the + * input data, and a number of 64 byte blocks to process. Once all blocks have + * been processed, the digest pointer is updated with the resulting hash value. + * The function only processes complete blocks, there is no functionality to + * store partial blocks. All message padding and hash value initialization must + * be done outside the update function. + * + * The indented lines in the loop are instructions related to rounds processing. + * The non-indented lines are instructions related to the message schedule. + * + * void sha1_block_data_order(uint32_t *digest, const void *data, + uint32_t numBlocks) + * digest : pointer to digest + * data: pointer to input data + * numBlocks: Number of blocks to process + */ +.text +.align 32 +ENTRY_NP(sha1_block_data_order) + mov %rsp, RSPSAVE + sub $FRAME_SIZE, %rsp + and $~0xF, %rsp + + shl $6, NUM_BLKS /* convert to bytes */ + jz .Ldone_hash + add DATA_PTR, NUM_BLKS /* pointer to end of data */ + + /* load initial hash values */ + pinsrd $3, 1*16(DIGEST_PTR), E0 + movdqu 0*16(DIGEST_PTR), ABCD + pand UPPER_WORD_MASK(%rip), E0 + pshufd $0x1B, ABCD, ABCD + + movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK + +.Lloop0: + /* Save hash values for addition after rounds */ + movdqa E0, (0*16)(%rsp) + movdqa ABCD, (1*16)(%rsp) + + /* Rounds 0-3 */ + movdqu 0*16(DATA_PTR), MSG0 + pshufb SHUF_MASK, MSG0 + paddd MSG0, E0 + movdqa ABCD, E1 + sha1rnds4 $0, E0, ABCD + + /* Rounds 4-7 */ + movdqu 1*16(DATA_PTR), MSG1 + pshufb SHUF_MASK, MSG1 + sha1nexte MSG1, E1 + movdqa ABCD, E0 + sha1rnds4 $0, E1, ABCD + sha1msg1 MSG1, MSG0 + + /* Rounds 8-11 */ + movdqu 2*16(DATA_PTR), MSG2 + pshufb SHUF_MASK, MSG2 + sha1nexte MSG2, E0 + movdqa ABCD, E1 + sha1rnds4 $0, E0, ABCD + sha1msg1 MSG2, MSG1 + pxor MSG2, MSG0 + + /* Rounds 12-15 */ + movdqu 3*16(DATA_PTR), MSG3 + pshufb SHUF_MASK, MSG3 + sha1nexte MSG3, E1 + movdqa ABCD, E0 + sha1msg2 MSG3, MSG0 + sha1rnds4 $0, E1, ABCD + sha1msg1 MSG3, MSG2 + pxor MSG3, MSG1 + + /* Rounds 16-19 */ + sha1nexte MSG0, E0 + movdqa ABCD, E1 + sha1msg2 MSG0, MSG1 + sha1rnds4 $0, E0, ABCD + sha1msg1 MSG0, MSG3 + pxor MSG0, MSG2 + + /* Rounds 20-23 */ + sha1nexte MSG1, E1 + movdqa ABCD, E0 + sha1msg2 MSG1, MSG2 + sha1rnds4 $1, E1, ABCD + sha1msg1 MSG1, MSG0 + pxor MSG1, MSG3 + + /* Rounds 24-27 */ + sha1nexte MSG2, E0 + movdqa ABCD, E1 + sha1msg2 MSG2, MSG3 + sha1rnds4 $1, E0, ABCD + sha1msg1 MSG2, MSG1 + pxor MSG2, MSG0 + + /* Rounds 28-31 */ + sha1nexte MSG3, E1 + movdqa ABCD, E0 + sha1msg2 MSG3, MSG0 + sha1rnds4 $1, E1, ABCD + sha1msg1 MSG3, MSG2 + pxor MSG3, MSG1 + + /* Rounds 32-35 */ + sha1nexte MSG0, E0 + movdqa ABCD, E1 + sha1msg2 MSG0, MSG1 + sha1rnds4 $1, E0, ABCD + sha1msg1 MSG0, MSG3 + pxor MSG0, MSG2 + + /* Rounds 36-39 */ + sha1nexte MSG1, E1 + movdqa ABCD, E0 + sha1msg2 MSG1, MSG2 + sha1rnds4 $1, E1, ABCD + sha1msg1 MSG1, MSG0 + pxor MSG1, MSG3 + + /* Rounds 40-43 */ + sha1nexte MSG2, E0 + movdqa ABCD, E1 + sha1msg2 MSG2, MSG3 + sha1rnds4 $2, E0, ABCD + sha1msg1 MSG2, MSG1 + pxor MSG2, MSG0 + + /* Rounds 44-47 */ + sha1nexte MSG3, E1 + movdqa ABCD, E0 + sha1msg2 MSG3, MSG0 + sha1rnds4 $2, E1, ABCD + sha1msg1 MSG3, MSG2 + pxor MSG3, MSG1 + + /* Rounds 48-51 */ + sha1nexte MSG0, E0 + movdqa ABCD, E1 + sha1msg2 MSG0, MSG1 + sha1rnds4 $2, E0, ABCD + sha1msg1 MSG0, MSG3 + pxor MSG0, MSG2 + + /* Rounds 52-55 */ + sha1nexte MSG1, E1 + movdqa ABCD, E0 + sha1msg2 MSG1, MSG2 + sha1rnds4 $2, E1, ABCD + sha1msg1 MSG1, MSG0 + pxor MSG1, MSG3 + + /* Rounds 56-59 */ + sha1nexte MSG2, E0 + movdqa ABCD, E1 + sha1msg2 MSG2, MSG3 + sha1rnds4 $2, E0, ABCD + sha1msg1 MSG2, MSG1 + pxor MSG2, MSG0 + + /* Rounds 60-63 */ + sha1nexte MSG3, E1 + movdqa ABCD, E0 + sha1msg2 MSG3, MSG0 + sha1rnds4 $3, E1, ABCD + sha1msg1 MSG3, MSG2 + pxor MSG3, MSG1 + + /* Rounds 64-67 */ + sha1nexte MSG0, E0 + movdqa ABCD, E1 + sha1msg2 MSG0, MSG1 + sha1rnds4 $3, E0, ABCD + sha1msg1 MSG0, MSG3 + pxor MSG0, MSG2 + + /* Rounds 68-71 */ + sha1nexte MSG1, E1 + movdqa ABCD, E0 + sha1msg2 MSG1, MSG2 + sha1rnds4 $3, E1, ABCD + pxor MSG1, MSG3 + + /* Rounds 72-75 */ + sha1nexte MSG2, E0 + movdqa ABCD, E1 + sha1msg2 MSG2, MSG3 + sha1rnds4 $3, E0, ABCD + + /* Rounds 76-79 */ + sha1nexte MSG3, E1 + movdqa ABCD, E0 + sha1rnds4 $3, E1, ABCD + + /* Add current hash values with previously saved */ + sha1nexte (0*16)(%rsp), E0 + paddd (1*16)(%rsp), ABCD + + /* Increment data pointer and loop if more to process */ + add $64, DATA_PTR + cmp NUM_BLKS, DATA_PTR + jne .Lloop0 + + /* Write hash values back in the correct order */ + pshufd $0x1B, ABCD, ABCD + movdqu ABCD, 0*16(DIGEST_PTR) + pextrd $3, E0, 1*16(DIGEST_PTR) + +.Ldone_hash: + mov RSPSAVE, %rsp + + ret +SET_SIZE(sha1_block_data_order) + +.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 +.align 16 +PSHUFFLE_BYTE_FLIP_MASK: + .octa 0x000102030405060708090a0b0c0d0e0f + +.section .rodata.cst16.UPPER_WORD_MASK, "aM", @progbits, 16 +.align 16 +UPPER_WORD_MASK: + .octa 0xFFFFFFFF000000000000000000000000 diff --git a/usr/src/common/crypto/sha2/amd64/THIRDPARTYLICENSE.sha256-ni b/usr/src/common/crypto/sha2/amd64/THIRDPARTYLICENSE.sha256-ni new file mode 100644 index 0000000000..b16506427d --- /dev/null +++ b/usr/src/common/crypto/sha2/amd64/THIRDPARTYLICENSE.sha256-ni @@ -0,0 +1,32 @@ +Intel SHA Extensions optimized implementation of a SHA-256 update function + +BSD LICENSE + +Copyright(c) 2015 Intel Corporation. +Copyright (c) 2018, Joyent, Inc. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/usr/src/common/crypto/sha2/amd64/THIRDPARTYLICENSE.sha256-ni.descrip b/usr/src/common/crypto/sha2/amd64/THIRDPARTYLICENSE.sha256-ni.descrip new file mode 100644 index 0000000000..45352c606d --- /dev/null +++ b/usr/src/common/crypto/sha2/amd64/THIRDPARTYLICENSE.sha256-ni.descrip @@ -0,0 +1 @@ +PORTIONS OF SHA2 FUNCTIONALITY diff --git a/usr/src/common/crypto/sha2/amd64/sha256-ni.s b/usr/src/common/crypto/sha2/amd64/sha256-ni.s new file mode 100644 index 0000000000..338fcbf659 --- /dev/null +++ b/usr/src/common/crypto/sha2/amd64/sha256-ni.s @@ -0,0 +1,378 @@ +/* + * Intel SHA Extensions optimized implementation of a SHA-256 update function + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * Sean Gulley <sean.m.gulley@intel.com> + * Tim Chen <tim.c.chen@linux.intel.com> + * + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * Copyright (c) 2018, Joyent, Inc. + */ + +/* + * illumos uses this file under the terms of the BSD license. + * + * The following are a series of changes that we have made to this code: + * + * o Changed the include to be sys/asm_linkage.h. + * o Use the sys/asm_linkage.h prototypes for assembly functions. + * o Renamed the function from sha256_ni_transform to SHA256TransformBlocks to + * match the illumos name for the function. + * o The illumos SHA256_CTX does not have the digest as the first member of its + * context struct. As such, an offset has to be added to the digest argument + * to make sure that we get to the actual digest. + * o Update the function prototype block comment to reflect that we have are + * passing the context and not the direct digest. + */ + +#include <sys/asm_linkage.h> + +#define DIGEST_PTR %rdi /* 1st arg */ +#define DATA_PTR %rsi /* 2nd arg */ +#define NUM_BLKS %rdx /* 3rd arg */ + +#define SHA256CONSTANTS %rax + +#define MSG %xmm0 +#define STATE0 %xmm1 +#define STATE1 %xmm2 +#define MSGTMP0 %xmm3 +#define MSGTMP1 %xmm4 +#define MSGTMP2 %xmm5 +#define MSGTMP3 %xmm6 +#define MSGTMP4 %xmm7 + +#define SHUF_MASK %xmm8 + +#define ABEF_SAVE %xmm9 +#define CDGH_SAVE %xmm10 + +/* + * Intel SHA Extensions optimized implementation of a SHA-256 update function + * + * The function takes a pointer to the current hash values, a pointer to the + * input data, and a number of 64 byte blocks to process. Once all blocks have + * been processed, the digest pointer is updated with the resulting hash value. + * The function only processes complete blocks, there is no functionality to + * store partial blocks. All message padding and hash value initialization must + * be done outside the update function. + * + * The indented lines in the loop are instructions related to rounds processing. + * The non-indented lines are instructions related to the message schedule. + * + * void SHA256TransformBlocks(SHA256_CTX *ctx, const void *data, + uint32_t numBlocks); + * digest : pointer to digest + * data: pointer to input data + * numBlocks: Number of blocks to process + */ + +.text +.align 32 +ENTRY_NP(SHA256TransformBlocks) + + shl $6, NUM_BLKS /* convert to bytes */ + jz .Ldone_hash + add DATA_PTR, NUM_BLKS /* pointer to end of data */ + + /* + * load initial hash values + * Need to reorder these appropriately + * DCBA, HGFE -> ABEF, CDGH + * + * Offset DIGEST_PTR to account for the algorithm in the context. + */ + addq $8, DIGEST_PTR + movdqu 0*16(DIGEST_PTR), STATE0 + movdqu 1*16(DIGEST_PTR), STATE1 + + pshufd $0xB1, STATE0, STATE0 /* CDAB */ + pshufd $0x1B, STATE1, STATE1 /* EFGH */ + movdqa STATE0, MSGTMP4 + palignr $8, STATE1, STATE0 /* ABEF */ + pblendw $0xF0, MSGTMP4, STATE1 /* CDGH */ + + movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK + lea K256(%rip), SHA256CONSTANTS + +.Lloop0: + /* Save hash values for addition after rounds */ + movdqa STATE0, ABEF_SAVE + movdqa STATE1, CDGH_SAVE + + /* Rounds 0-3 */ + movdqu 0*16(DATA_PTR), MSG + pshufb SHUF_MASK, MSG + movdqa MSG, MSGTMP0 + paddd 0*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + + /* Rounds 4-7 */ + movdqu 1*16(DATA_PTR), MSG + pshufb SHUF_MASK, MSG + movdqa MSG, MSGTMP1 + paddd 1*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP1, MSGTMP0 + + /* Rounds 8-11 */ + movdqu 2*16(DATA_PTR), MSG + pshufb SHUF_MASK, MSG + movdqa MSG, MSGTMP2 + paddd 2*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP2, MSGTMP1 + + /* Rounds 12-15 */ + movdqu 3*16(DATA_PTR), MSG + pshufb SHUF_MASK, MSG + movdqa MSG, MSGTMP3 + paddd 3*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP3, MSGTMP4 + palignr $4, MSGTMP2, MSGTMP4 + paddd MSGTMP4, MSGTMP0 + sha256msg2 MSGTMP3, MSGTMP0 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP3, MSGTMP2 + + /* Rounds 16-19 */ + movdqa MSGTMP0, MSG + paddd 4*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP0, MSGTMP4 + palignr $4, MSGTMP3, MSGTMP4 + paddd MSGTMP4, MSGTMP1 + sha256msg2 MSGTMP0, MSGTMP1 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP0, MSGTMP3 + + /* Rounds 20-23 */ + movdqa MSGTMP1, MSG + paddd 5*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP1, MSGTMP4 + palignr $4, MSGTMP0, MSGTMP4 + paddd MSGTMP4, MSGTMP2 + sha256msg2 MSGTMP1, MSGTMP2 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP1, MSGTMP0 + + /* Rounds 24-27 */ + movdqa MSGTMP2, MSG + paddd 6*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP2, MSGTMP4 + palignr $4, MSGTMP1, MSGTMP4 + paddd MSGTMP4, MSGTMP3 + sha256msg2 MSGTMP2, MSGTMP3 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP2, MSGTMP1 + + /* Rounds 28-31 */ + movdqa MSGTMP3, MSG + paddd 7*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP3, MSGTMP4 + palignr $4, MSGTMP2, MSGTMP4 + paddd MSGTMP4, MSGTMP0 + sha256msg2 MSGTMP3, MSGTMP0 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP3, MSGTMP2 + + /* Rounds 32-35 */ + movdqa MSGTMP0, MSG + paddd 8*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP0, MSGTMP4 + palignr $4, MSGTMP3, MSGTMP4 + paddd MSGTMP4, MSGTMP1 + sha256msg2 MSGTMP0, MSGTMP1 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP0, MSGTMP3 + + /* Rounds 36-39 */ + movdqa MSGTMP1, MSG + paddd 9*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP1, MSGTMP4 + palignr $4, MSGTMP0, MSGTMP4 + paddd MSGTMP4, MSGTMP2 + sha256msg2 MSGTMP1, MSGTMP2 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP1, MSGTMP0 + + /* Rounds 40-43 */ + movdqa MSGTMP2, MSG + paddd 10*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP2, MSGTMP4 + palignr $4, MSGTMP1, MSGTMP4 + paddd MSGTMP4, MSGTMP3 + sha256msg2 MSGTMP2, MSGTMP3 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP2, MSGTMP1 + + /* Rounds 44-47 */ + movdqa MSGTMP3, MSG + paddd 11*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP3, MSGTMP4 + palignr $4, MSGTMP2, MSGTMP4 + paddd MSGTMP4, MSGTMP0 + sha256msg2 MSGTMP3, MSGTMP0 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP3, MSGTMP2 + + /* Rounds 48-51 */ + movdqa MSGTMP0, MSG + paddd 12*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP0, MSGTMP4 + palignr $4, MSGTMP3, MSGTMP4 + paddd MSGTMP4, MSGTMP1 + sha256msg2 MSGTMP0, MSGTMP1 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP0, MSGTMP3 + + /* Rounds 52-55 */ + movdqa MSGTMP1, MSG + paddd 13*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP1, MSGTMP4 + palignr $4, MSGTMP0, MSGTMP4 + paddd MSGTMP4, MSGTMP2 + sha256msg2 MSGTMP1, MSGTMP2 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + + /* Rounds 56-59 */ + movdqa MSGTMP2, MSG + paddd 14*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP2, MSGTMP4 + palignr $4, MSGTMP1, MSGTMP4 + paddd MSGTMP4, MSGTMP3 + sha256msg2 MSGTMP2, MSGTMP3 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + + /* Rounds 60-63 */ + movdqa MSGTMP3, MSG + paddd 15*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + + /* Add current hash values with previously saved */ + paddd ABEF_SAVE, STATE0 + paddd CDGH_SAVE, STATE1 + + /* Increment data pointer and loop if more to process */ + add $64, DATA_PTR + cmp NUM_BLKS, DATA_PTR + jne .Lloop0 + + /* Write hash values back in the correct order */ + pshufd $0x1B, STATE0, STATE0 /* FEBA */ + pshufd $0xB1, STATE1, STATE1 /* DCHG */ + movdqa STATE0, MSGTMP4 + pblendw $0xF0, STATE1, STATE0 /* DCBA */ + palignr $8, MSGTMP4, STATE1 /* HGFE */ + + movdqu STATE0, 0*16(DIGEST_PTR) + movdqu STATE1, 1*16(DIGEST_PTR) + +.Ldone_hash: + + ret +SET_SIZE(SHA256TransformBlocks) + +.section .rodata.cst256.K256, "aM", @progbits, 256 +.align 64 +K256: + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 +.align 16 +PSHUFFLE_BYTE_FLIP_MASK: + .octa 0x0c0d0e0f08090a0b0405060700010203 diff --git a/usr/src/lib/libmd/amd64/Makefile b/usr/src/lib/libmd/amd64/Makefile index 8ac0b15a30..1f8143966f 100644 --- a/usr/src/lib/libmd/amd64/Makefile +++ b/usr/src/lib/libmd/amd64/Makefile @@ -34,13 +34,23 @@ include $(SRC)/lib/Makefile.rootfs include $(SRC)/lib/Makefile.lib.64 include ../Makefile.com +CAPFILES = pics/sha1-ni.o \ + pics/sha256-ni.o + EXTPICS = pics/md5_amd64.o \ - pics/sha1-x86_64.o \ pics/sha512-x86_64.o \ - pics/sha256-x86_64.o + pics/sha256-x86_64.o \ + pics/sha1-x86_64.o \ + $(CAPFILES:%.o=%.o.symcap) CLEANFILES += $(EXTPICS) \ - $(EXTPICS:pics/%.o=%.s) + $(EXTPICS:pics/%.o=%.s) \ + $(CAPFILES) \ + $(CAPFILES:%.o=%.o.objcap) \ + $(CAPFILES:%.o=%.o.symcap) + +pics/sha1-ni.o.objcap := CAPFILE = capabilities/sha.cap +pics/sha256-ni.o.objcap := CAPFILE = capabilities/sha.cap # This prevents <sys/asm_linkage.h> from including C source: AS_CPPFLAGS += -D_ASM @@ -57,6 +67,21 @@ pics/%.o: %.s $(COMPILE.s) -o $@ ${@F:.o=.s} $(POST_PROCESS_O) +pics/%.o: $(SRC)/common/crypto/sha1/amd64/%.s + $(COMPILE.s) -o $@ $< + $(POST_PROCESS_O) + +pics/%.o: $(SRC)/common/crypto/sha2/amd64/%.s + $(COMPILE.s) -o $@ $< + $(POST_PROCESS_O) + +pics/%.o.objcap: pics/%.o + $(LD) -r -o $@ -M$(CAPFILE) -Breduce $< + $(POST_PROCESS_O) + +pics/%.o.symcap: pics/%.o.objcap + $(LD) -r -o $@ -z symbolcap $< + md5_amd64.s: $(COMDIR)/md5/amd64/md5_amd64.pl $(PERL) $? $@ diff --git a/usr/src/lib/libmd/amd64/capabilities/sha.cap b/usr/src/lib/libmd/amd64/capabilities/sha.cap new file mode 100644 index 0000000000..554cc50adf --- /dev/null +++ b/usr/src/lib/libmd/amd64/capabilities/sha.cap @@ -0,0 +1,29 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2018 Joyent, Inc. +# + +$mapfile_version 2 + +CAPABILITY shani { + MACHINE = i86pc; + HW_1 = SSE2 SSSE3 SSE4.1; + # + # To avoid a chicken and egg problem with ld, we refer to the + # SHA hardware cap value by its numeric value rather than the name. + # This allows us to deal with the fact that we're almost + # certainly building this on a system before the capability was + # known. + # + HW_2 = 0x100000; +}; diff --git a/usr/src/lib/libmd/common/mapfile-vers b/usr/src/lib/libmd/common/mapfile-vers index f0b225235b..39578d4c6d 100644 --- a/usr/src/lib/libmd/common/mapfile-vers +++ b/usr/src/lib/libmd/common/mapfile-vers @@ -21,6 +21,7 @@ # # Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. # Copyright 2013 Saso Kiselkov. All rights reserved. +# Copyright (c) 2018, Joyent, Inc. # # @@ -101,6 +102,24 @@ SYMBOL_VERSION SUNW_1.1 { SHA512Final { FLAGS = NODYNSORT }; SHA512Init; SHA512Update { FLAGS = NODYNSORT }; +}; + +SYMBOL_VERSION ILLUMOSprivate { + global: +$if _ELF64 && _x86 + # + # We use hardware capabilities to define multiple versions of + # the block processing functions for some of the SHA functions. + # Unfortunately, that requires these to be global symbols, even + # though we don't want them to be. As such, they remain in a + # private version and hopefully some day will be removed. These + # symbols are not exposed in any meaningful way in a header file + # so no application should actually end up requiring this + # section. + # + sha1_block_data_order; + SHA256TransformBlocks; +$endif local: *; }; |