diff options
Diffstat (limited to 'src/pkg/hash/crc32')
-rw-r--r-- | src/pkg/hash/crc32/crc32_amd64p32.s | 64 | ||||
-rw-r--r-- | src/pkg/hash/crc32/crc32_amd64x.go (renamed from src/pkg/hash/crc32/crc32_amd64.go) | 2 |
2 files changed, 66 insertions, 0 deletions
diff --git a/src/pkg/hash/crc32/crc32_amd64p32.s b/src/pkg/hash/crc32/crc32_amd64p32.s new file mode 100644 index 000000000..e34f20867 --- /dev/null +++ b/src/pkg/hash/crc32/crc32_amd64p32.s @@ -0,0 +1,64 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "../../../cmd/ld/textflag.h" + +// func castagnoliSSE42(crc uint32, p []byte) uint32 +TEXT ·castagnoliSSE42(SB),NOSPLIT,$0 + MOVL crc+0(FP), AX // CRC value + MOVL p+4(FP), SI // data pointer + MOVL p_len+8(FP), CX // len(p) + + NOTL AX + + /* If there's less than 8 bytes to process, we do it byte-by-byte. */ + CMPQ CX, $8 + JL cleanup + + /* Process individual bytes until the input is 8-byte aligned. */ +startup: + MOVQ SI, BX + ANDQ $7, BX + JZ aligned + + CRC32B (SI), AX + DECQ CX + INCQ SI + JMP startup + +aligned: + /* The input is now 8-byte aligned and we can process 8-byte chunks. */ + CMPQ CX, $8 + JL cleanup + + CRC32Q (SI), AX + ADDQ $8, SI + SUBQ $8, CX + JMP aligned + +cleanup: + /* We may have some bytes left over that we process one at a time. */ + CMPQ CX, $0 + JE done + + CRC32B (SI), AX + INCQ SI + DECQ CX + JMP cleanup + +done: + NOTL AX + MOVL AX, ret+16(FP) + RET + +// func haveSSE42() bool +TEXT ·haveSSE42(SB),NOSPLIT,$0 + XORQ AX, AX + INCL AX + CPUID + SHRQ $20, CX + ANDQ $1, CX + MOVB CX, ret+0(FP) + RET + diff --git a/src/pkg/hash/crc32/crc32_amd64.go b/src/pkg/hash/crc32/crc32_amd64x.go index b5bc6d3cf..b7e359930 100644 --- a/src/pkg/hash/crc32/crc32_amd64.go +++ b/src/pkg/hash/crc32/crc32_amd64x.go @@ -2,6 +2,8 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +// +build amd64 amd64p32 + package crc32 // This file contains the code to call the SSE 4.2 version of the Castagnoli |