update filter package
This commit is contained in:
parent
f05a80459e
commit
6e75eec88c
|
@ -1,3 +1,5 @@
|
|||
//+build amd64
|
||||
|
||||
package filter
|
||||
|
||||
import (
|
||||
|
@ -6,13 +8,13 @@ import (
|
|||
|
||||
func init() {
|
||||
if cpuid.CPU.SSE4() {
|
||||
encode = EncodeSSE4
|
||||
decode = DecodeSSE4
|
||||
encode = SSE4Encode
|
||||
decode = SSE4Decode
|
||||
} else if cpuid.CPU.SSE2() {
|
||||
encode = EncodeSSE2
|
||||
decode = DecodeSSE2
|
||||
encode = SSE2Encode
|
||||
decode = SSE2Decode
|
||||
} else {
|
||||
encode = EncodeCompat
|
||||
decode = DecodeCompat
|
||||
encode = X64Encode
|
||||
decode = X64Decode
|
||||
}
|
||||
}
|
||||
|
|
|
@ -14,8 +14,8 @@ type Filter struct {
|
|||
type codec func(fs *Filter, in *int32)
|
||||
|
||||
var (
|
||||
decode codec
|
||||
encode codec
|
||||
decode codec = CompatDecode
|
||||
encode codec = CompatEncode
|
||||
)
|
||||
|
||||
// New creates a Filter based on data and shift
|
||||
|
@ -42,7 +42,7 @@ func (f *Filter) Encode(in *int32) {
|
|||
encode(f, in)
|
||||
}
|
||||
|
||||
func decodeCompat(f *Filter, in *int32) {
|
||||
func CompatDecode(f *Filter, in *int32) {
|
||||
pa := f.dl[:]
|
||||
pb := f.qm[:]
|
||||
pm := f.dx[:]
|
||||
|
@ -92,7 +92,7 @@ func decodeCompat(f *Filter, in *int32) {
|
|||
pa[4] += pa[5]
|
||||
}
|
||||
|
||||
func encodeCompat(f *Filter, in *int32) {
|
||||
func CompatEncode(f *Filter, in *int32) {
|
||||
pa := f.dl[:]
|
||||
pb := f.qm[:]
|
||||
pm := f.dx[:]
|
||||
|
|
|
@ -7,31 +7,27 @@ import (
|
|||
"unsafe"
|
||||
)
|
||||
|
||||
func DecodeSSE4(f *Filter, in *int32) { _HybridFilterDecodeSSE4(unsafe.Pointer(f), unsafe.Pointer(in)) }
|
||||
func EncodeSSE4(f *Filter, in *int32) { _HybridFilterEncodeSSE4(unsafe.Pointer(f), unsafe.Pointer(in)) }
|
||||
func DecodeSSE2(f *Filter, in *int32) { _HybridFilterDecodeSSE2(unsafe.Pointer(f), unsafe.Pointer(in)) }
|
||||
func EncodeSSE2(f *Filter, in *int32) { _HybridFilterEncodeSSE2(unsafe.Pointer(f), unsafe.Pointer(in)) }
|
||||
func DecodeCompat(f *Filter, in *int32) {
|
||||
_HybridFilterDecodeCompat(unsafe.Pointer(f), unsafe.Pointer(in))
|
||||
}
|
||||
func EncodeCompat(f *Filter, in *int32) {
|
||||
_HybridFilterEncodeCompat(unsafe.Pointer(f), unsafe.Pointer(in))
|
||||
}
|
||||
func SSE4Decode(f *Filter, in *int32) { sse4Decode(unsafe.Pointer(f), unsafe.Pointer(in)) }
|
||||
func SSE4Encode(f *Filter, in *int32) { sse4Encode(unsafe.Pointer(f), unsafe.Pointer(in)) }
|
||||
func SSE2Decode(f *Filter, in *int32) { sse2Decode(unsafe.Pointer(f), unsafe.Pointer(in)) }
|
||||
func SSE2Encode(f *Filter, in *int32) { sse2Encode(unsafe.Pointer(f), unsafe.Pointer(in)) }
|
||||
func X64Decode(f *Filter, in *int32) { x64Decode(unsafe.Pointer(f), unsafe.Pointer(in)) }
|
||||
func X64Encode(f *Filter, in *int32) { x64Encode(unsafe.Pointer(f), unsafe.Pointer(in)) }
|
||||
|
||||
//go:noescape
|
||||
func _HybridFilterDecodeSSE4(fs, in unsafe.Pointer)
|
||||
func sse4Decode(fs, in unsafe.Pointer)
|
||||
|
||||
//go:noescape
|
||||
func _HybridFilterEncodeSSE4(fs, in unsafe.Pointer)
|
||||
func sse4Encode(fs, in unsafe.Pointer)
|
||||
|
||||
//go:noescape
|
||||
func _HybridFilterDecodeSSE2(fs, in unsafe.Pointer)
|
||||
func sse2Decode(fs, in unsafe.Pointer)
|
||||
|
||||
//go:noescape
|
||||
func _HybridFilterEncodeSSE2(fs, in unsafe.Pointer)
|
||||
func sse2Encode(fs, in unsafe.Pointer)
|
||||
|
||||
//go:noescape
|
||||
func _HybridFilterDecodeCompat(fs, in unsafe.Pointer)
|
||||
func x64Decode(fs, in unsafe.Pointer)
|
||||
|
||||
//go:noescape
|
||||
func _HybridFilterEncodeCompat(fs, in unsafe.Pointer)
|
||||
func x64Encode(fs, in unsafe.Pointer)
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
//+build !noasm !appengine
|
||||
// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT
|
||||
// AUTO-GENERATED BY GOASMC -- DO NOT EDIT
|
||||
|
||||
DATA LCDATA1<>+0x000(SB)/8, $0x0000000200000001
|
||||
DATA LCDATA1<>+0x008(SB)/8, $0x0000000400000002
|
||||
|
@ -7,7 +7,7 @@ DATA LCDATA1<>+0x010(SB)/8, $0xfffffffeffffffff
|
|||
DATA LCDATA1<>+0x018(SB)/8, $0xfffffffcfffffffe
|
||||
GLOBL LCDATA1<>(SB), 8, $32
|
||||
|
||||
TEXT ·_HybridFilterDecodeSSE4(SB), $0-16
|
||||
TEXT ·sse4Decode(SB), $0-16
|
||||
|
||||
MOVQ fs+0(FP), DI
|
||||
MOVQ in+8(FP), SI
|
||||
|
@ -86,7 +86,7 @@ DATA LCDATA2<>+0x010(SB)/8, $0xfffffffeffffffff
|
|||
DATA LCDATA2<>+0x018(SB)/8, $0xfffffffcfffffffe
|
||||
GLOBL LCDATA2<>(SB), 8, $32
|
||||
|
||||
TEXT ·_HybridFilterDecodeSSE2(SB), $0-16
|
||||
TEXT ·sse4Encode(SB), $0-16
|
||||
|
||||
MOVQ fs+0(FP), DI
|
||||
MOVQ in+8(FP), SI
|
||||
|
@ -118,57 +118,46 @@ LBB1_4:
|
|||
LONG $0x677f0f66; BYTE $0x20 // movdqa oword [rdi + 32], xmm4
|
||||
|
||||
LBB1_5:
|
||||
LONG $0xf26f0f66 // movdqa xmm6, xmm2
|
||||
LONG $0xf5f40f66 // pmuludq xmm6, xmm5
|
||||
LONG $0xfa700f66; BYTE $0xb1 // pshufd xmm7, xmm2, 177
|
||||
LONG $0xed700f66; BYTE $0xb1 // pshufd xmm5, xmm5, 177
|
||||
LONG $0xeff40f66 // pmuludq xmm5, xmm7
|
||||
LONG $0xed700f66; BYTE $0xa0 // pshufd xmm5, xmm5, 160
|
||||
LONG $0x0e3a0f66; WORD $0x33ee // pblendw xmm5, xmm6, 51
|
||||
LONG $0xf06f0f66 // movdqa xmm6, xmm0
|
||||
LONG $0xf4f40f66 // pmuludq xmm6, xmm4
|
||||
LONG $0xf8700f66; BYTE $0xb1 // pshufd xmm7, xmm0, 177
|
||||
LONG $0xe4700f66; BYTE $0xb1 // pshufd xmm4, xmm4, 177
|
||||
LONG $0xe7f40f66 // pmuludq xmm4, xmm7
|
||||
LONG $0xe4700f66; BYTE $0xa0 // pshufd xmm4, xmm4, 160
|
||||
LONG $0x0e3a0f66; WORD $0x33e6 // pblendw xmm4, xmm6, 51
|
||||
LONG $0xe5fe0f66 // paddd xmm4, xmm5
|
||||
LONG $0xec700f66; BYTE $0xee // pshufd xmm5, xmm4, 238
|
||||
LONG $0xecfe0f66 // paddd xmm5, xmm4
|
||||
LONG $0xea7e0f66 // movd edx, xmm5
|
||||
LONG $0x163a0f66; WORD $0x01e8 // pextrd eax, xmm5, 1
|
||||
WORD $0x0144; BYTE $0xc0 // add eax, r8d
|
||||
WORD $0xd001 // add eax, edx
|
||||
LONG $0x0f3a0f66; WORD $0x04cb // palignr xmm1, xmm3, 4
|
||||
LONG $0xd86f0f66 // movdqa xmm3, xmm0
|
||||
LONG $0x0f3a0f66; WORD $0x04da // palignr xmm3, xmm2, 4
|
||||
LONG $0xe0720f66; BYTE $0x1e // psrad xmm0, 30
|
||||
LONG $0x45eb0f66; BYTE $0x00 // por xmm0, oword 0[rbp] /* [rip + LCPI1_0] */
|
||||
LONG $0x45db0f66; BYTE $0x10 // pand xmm0, oword 16[rbp] /* [rip + LCPI1_1] */
|
||||
LONG $0x9f7f0f66; LONG $0x00000090 // movdqa oword [rdi + 144], xmm3
|
||||
LONG $0x4f7f0f66; BYTE $0x30 // movdqa oword [rdi + 48], xmm1
|
||||
LONG $0x477f0f66; BYTE $0x40 // movdqa oword [rdi + 64], xmm0
|
||||
WORD $0x168b // mov edx, dword [rsi]
|
||||
WORD $0x5789; BYTE $0x04 // mov dword [rdi + 4], edx
|
||||
WORD $0xf8d3 // sar eax, cl
|
||||
WORD $0x0601 // add dword [rsi], eax
|
||||
LONG $0x00a4878b; WORD $0x0000 // mov eax, dword [rdi + 164]
|
||||
WORD $0xc189 // mov ecx, eax
|
||||
WORD $0xd9f7 // neg ecx
|
||||
LONG $0x00a08f89; WORD $0x0000 // mov dword [rdi + 160], ecx
|
||||
LONG $0x00a88f8b; WORD $0x0000 // mov ecx, dword [rdi + 168]
|
||||
WORD $0xca89 // mov edx, ecx
|
||||
WORD $0xdaf7 // neg edx
|
||||
LONG $0x00a49789; WORD $0x0000 // mov dword [rdi + 164], edx
|
||||
WORD $0x168b // mov edx, dword [rsi]
|
||||
LONG $0x00ac972b; WORD $0x0000 // sub edx, dword [rdi + 172]
|
||||
LONG $0x00a89789; WORD $0x0000 // mov dword [rdi + 168], edx
|
||||
WORD $0x368b // mov esi, dword [rsi]
|
||||
LONG $0x00acb789; WORD $0x0000 // mov dword [rdi + 172], esi
|
||||
WORD $0xca29 // sub edx, ecx
|
||||
LONG $0x00a49789; WORD $0x0000 // mov dword [rdi + 164], edx
|
||||
WORD $0xc229 // sub edx, eax
|
||||
LONG $0x00a09789; WORD $0x0000 // mov dword [rdi + 160], edx
|
||||
LONG $0x40380f66; BYTE $0xea // pmulld xmm5, xmm2
|
||||
LONG $0x40380f66; BYTE $0xe0 // pmulld xmm4, xmm0
|
||||
LONG $0xe5fe0f66 // paddd xmm4, xmm5
|
||||
LONG $0xec700f66; BYTE $0xee // pshufd xmm5, xmm4, 238
|
||||
LONG $0xecfe0f66 // paddd xmm5, xmm4
|
||||
LONG $0xe87e0f66 // movd eax, xmm5
|
||||
LONG $0x3a0f4166; WORD $0xea16; BYTE $0x01 // pextrd r10d, xmm5, 1
|
||||
WORD $0x0145; BYTE $0xc2 // add r10d, r8d
|
||||
WORD $0x0141; BYTE $0xc2 // add r10d, eax
|
||||
LONG $0x0f3a0f66; WORD $0x04cb // palignr xmm1, xmm3, 4
|
||||
LONG $0xd86f0f66 // movdqa xmm3, xmm0
|
||||
LONG $0x0f3a0f66; WORD $0x04da // palignr xmm3, xmm2, 4
|
||||
LONG $0xe0720f66; BYTE $0x1e // psrad xmm0, 30
|
||||
LONG $0x45eb0f66; BYTE $0x00 // por xmm0, oword 0[rbp] /* [rip + LCPI1_0] */
|
||||
LONG $0x45db0f66; BYTE $0x10 // pand xmm0, oword 16[rbp] /* [rip + LCPI1_1] */
|
||||
LONG $0x9f7f0f66; LONG $0x00000090 // movdqa oword [rdi + 144], xmm3
|
||||
LONG $0x4f7f0f66; BYTE $0x30 // movdqa oword [rdi + 48], xmm1
|
||||
LONG $0x477f0f66; BYTE $0x40 // movdqa oword [rdi + 64], xmm0
|
||||
LONG $0xa4878b44; WORD $0x0000; BYTE $0x00 // mov r8d, dword [rdi + 164]
|
||||
WORD $0x8944; BYTE $0xc0 // mov eax, r8d
|
||||
WORD $0xd8f7 // neg eax
|
||||
LONG $0x00a08789; WORD $0x0000 // mov dword [rdi + 160], eax
|
||||
LONG $0xa88f8b44; WORD $0x0000; BYTE $0x00 // mov r9d, dword [rdi + 168]
|
||||
WORD $0x8944; BYTE $0xc8 // mov eax, r9d
|
||||
WORD $0xd8f7 // neg eax
|
||||
LONG $0x00a48789; WORD $0x0000 // mov dword [rdi + 164], eax
|
||||
WORD $0x068b // mov eax, dword [rsi]
|
||||
LONG $0x00ac872b; WORD $0x0000 // sub eax, dword [rdi + 172]
|
||||
LONG $0x00a88789; WORD $0x0000 // mov dword [rdi + 168], eax
|
||||
WORD $0x168b // mov edx, dword [rsi]
|
||||
LONG $0x00ac9789; WORD $0x0000 // mov dword [rdi + 172], edx
|
||||
WORD $0x2944; BYTE $0xc8 // sub eax, r9d
|
||||
LONG $0x00a48789; WORD $0x0000 // mov dword [rdi + 164], eax
|
||||
WORD $0x2944; BYTE $0xc0 // sub eax, r8d
|
||||
LONG $0x00a08789; WORD $0x0000 // mov dword [rdi + 160], eax
|
||||
WORD $0xd341; BYTE $0xfa // sar r10d, cl
|
||||
WORD $0x068b // mov eax, dword [rsi]
|
||||
WORD $0x2944; BYTE $0xd0 // sub eax, r10d
|
||||
WORD $0x0689 // mov dword [rsi], eax
|
||||
WORD $0x4789; BYTE $0x04 // mov dword [rdi + 4], eax
|
||||
RET
|
||||
|
||||
DATA LCDATA3<>+0x000(SB)/8, $0x0000000200000001
|
||||
|
@ -177,7 +166,7 @@ DATA LCDATA3<>+0x010(SB)/8, $0xfffffffeffffffff
|
|||
DATA LCDATA3<>+0x018(SB)/8, $0xfffffffcfffffffe
|
||||
GLOBL LCDATA3<>(SB), 8, $32
|
||||
|
||||
TEXT ·_HybridFilterEncodeSSE4(SB), $0-16
|
||||
TEXT ·sse2Decode(SB), $0-16
|
||||
|
||||
MOVQ fs+0(FP), DI
|
||||
MOVQ in+8(FP), SI
|
||||
|
@ -209,46 +198,57 @@ LBB2_4:
|
|||
LONG $0x677f0f66; BYTE $0x20 // movdqa oword [rdi + 32], xmm4
|
||||
|
||||
LBB2_5:
|
||||
LONG $0x40380f66; BYTE $0xea // pmulld xmm5, xmm2
|
||||
LONG $0x40380f66; BYTE $0xe0 // pmulld xmm4, xmm0
|
||||
LONG $0xe5fe0f66 // paddd xmm4, xmm5
|
||||
LONG $0xec700f66; BYTE $0xee // pshufd xmm5, xmm4, 238
|
||||
LONG $0xecfe0f66 // paddd xmm5, xmm4
|
||||
LONG $0xe87e0f66 // movd eax, xmm5
|
||||
LONG $0x3a0f4166; WORD $0xea16; BYTE $0x01 // pextrd r10d, xmm5, 1
|
||||
WORD $0x0145; BYTE $0xc2 // add r10d, r8d
|
||||
WORD $0x0141; BYTE $0xc2 // add r10d, eax
|
||||
LONG $0x0f3a0f66; WORD $0x04cb // palignr xmm1, xmm3, 4
|
||||
LONG $0xd86f0f66 // movdqa xmm3, xmm0
|
||||
LONG $0x0f3a0f66; WORD $0x04da // palignr xmm3, xmm2, 4
|
||||
LONG $0xe0720f66; BYTE $0x1e // psrad xmm0, 30
|
||||
LONG $0x45eb0f66; BYTE $0x00 // por xmm0, oword 0[rbp] /* [rip + LCPI2_0] */
|
||||
LONG $0x45db0f66; BYTE $0x10 // pand xmm0, oword 16[rbp] /* [rip + LCPI2_1] */
|
||||
LONG $0x9f7f0f66; LONG $0x00000090 // movdqa oword [rdi + 144], xmm3
|
||||
LONG $0x4f7f0f66; BYTE $0x30 // movdqa oword [rdi + 48], xmm1
|
||||
LONG $0x477f0f66; BYTE $0x40 // movdqa oword [rdi + 64], xmm0
|
||||
LONG $0xa4878b44; WORD $0x0000; BYTE $0x00 // mov r8d, dword [rdi + 164]
|
||||
WORD $0x8944; BYTE $0xc0 // mov eax, r8d
|
||||
WORD $0xd8f7 // neg eax
|
||||
LONG $0x00a08789; WORD $0x0000 // mov dword [rdi + 160], eax
|
||||
LONG $0xa88f8b44; WORD $0x0000; BYTE $0x00 // mov r9d, dword [rdi + 168]
|
||||
WORD $0x8944; BYTE $0xc8 // mov eax, r9d
|
||||
WORD $0xd8f7 // neg eax
|
||||
LONG $0x00a48789; WORD $0x0000 // mov dword [rdi + 164], eax
|
||||
WORD $0x068b // mov eax, dword [rsi]
|
||||
LONG $0x00ac872b; WORD $0x0000 // sub eax, dword [rdi + 172]
|
||||
LONG $0x00a88789; WORD $0x0000 // mov dword [rdi + 168], eax
|
||||
WORD $0x168b // mov edx, dword [rsi]
|
||||
LONG $0x00ac9789; WORD $0x0000 // mov dword [rdi + 172], edx
|
||||
WORD $0x2944; BYTE $0xc8 // sub eax, r9d
|
||||
LONG $0x00a48789; WORD $0x0000 // mov dword [rdi + 164], eax
|
||||
WORD $0x2944; BYTE $0xc0 // sub eax, r8d
|
||||
LONG $0x00a08789; WORD $0x0000 // mov dword [rdi + 160], eax
|
||||
WORD $0xd341; BYTE $0xfa // sar r10d, cl
|
||||
WORD $0x068b // mov eax, dword [rsi]
|
||||
WORD $0x2944; BYTE $0xd0 // sub eax, r10d
|
||||
WORD $0x0689 // mov dword [rsi], eax
|
||||
WORD $0x4789; BYTE $0x04 // mov dword [rdi + 4], eax
|
||||
LONG $0xf26f0f66 // movdqa xmm6, xmm2
|
||||
LONG $0xf5f40f66 // pmuludq xmm6, xmm5
|
||||
LONG $0xfa700f66; BYTE $0xb1 // pshufd xmm7, xmm2, 177
|
||||
LONG $0xed700f66; BYTE $0xb1 // pshufd xmm5, xmm5, 177
|
||||
LONG $0xeff40f66 // pmuludq xmm5, xmm7
|
||||
LONG $0xed700f66; BYTE $0xa0 // pshufd xmm5, xmm5, 160
|
||||
LONG $0x0e3a0f66; WORD $0x33ee // pblendw xmm5, xmm6, 51
|
||||
LONG $0xf06f0f66 // movdqa xmm6, xmm0
|
||||
LONG $0xf4f40f66 // pmuludq xmm6, xmm4
|
||||
LONG $0xf8700f66; BYTE $0xb1 // pshufd xmm7, xmm0, 177
|
||||
LONG $0xe4700f66; BYTE $0xb1 // pshufd xmm4, xmm4, 177
|
||||
LONG $0xe7f40f66 // pmuludq xmm4, xmm7
|
||||
LONG $0xe4700f66; BYTE $0xa0 // pshufd xmm4, xmm4, 160
|
||||
LONG $0x0e3a0f66; WORD $0x33e6 // pblendw xmm4, xmm6, 51
|
||||
LONG $0xe5fe0f66 // paddd xmm4, xmm5
|
||||
LONG $0xec700f66; BYTE $0xee // pshufd xmm5, xmm4, 238
|
||||
LONG $0xecfe0f66 // paddd xmm5, xmm4
|
||||
LONG $0xea7e0f66 // movd edx, xmm5
|
||||
LONG $0x163a0f66; WORD $0x01e8 // pextrd eax, xmm5, 1
|
||||
WORD $0x0144; BYTE $0xc0 // add eax, r8d
|
||||
WORD $0xd001 // add eax, edx
|
||||
LONG $0x0f3a0f66; WORD $0x04cb // palignr xmm1, xmm3, 4
|
||||
LONG $0xd86f0f66 // movdqa xmm3, xmm0
|
||||
LONG $0x0f3a0f66; WORD $0x04da // palignr xmm3, xmm2, 4
|
||||
LONG $0xe0720f66; BYTE $0x1e // psrad xmm0, 30
|
||||
LONG $0x45eb0f66; BYTE $0x00 // por xmm0, oword 0[rbp] /* [rip + LCPI2_0] */
|
||||
LONG $0x45db0f66; BYTE $0x10 // pand xmm0, oword 16[rbp] /* [rip + LCPI2_1] */
|
||||
LONG $0x9f7f0f66; LONG $0x00000090 // movdqa oword [rdi + 144], xmm3
|
||||
LONG $0x4f7f0f66; BYTE $0x30 // movdqa oword [rdi + 48], xmm1
|
||||
LONG $0x477f0f66; BYTE $0x40 // movdqa oword [rdi + 64], xmm0
|
||||
WORD $0x168b // mov edx, dword [rsi]
|
||||
WORD $0x5789; BYTE $0x04 // mov dword [rdi + 4], edx
|
||||
WORD $0xf8d3 // sar eax, cl
|
||||
WORD $0x0601 // add dword [rsi], eax
|
||||
LONG $0x00a4878b; WORD $0x0000 // mov eax, dword [rdi + 164]
|
||||
WORD $0xc189 // mov ecx, eax
|
||||
WORD $0xd9f7 // neg ecx
|
||||
LONG $0x00a08f89; WORD $0x0000 // mov dword [rdi + 160], ecx
|
||||
LONG $0x00a88f8b; WORD $0x0000 // mov ecx, dword [rdi + 168]
|
||||
WORD $0xca89 // mov edx, ecx
|
||||
WORD $0xdaf7 // neg edx
|
||||
LONG $0x00a49789; WORD $0x0000 // mov dword [rdi + 164], edx
|
||||
WORD $0x168b // mov edx, dword [rsi]
|
||||
LONG $0x00ac972b; WORD $0x0000 // sub edx, dword [rdi + 172]
|
||||
LONG $0x00a89789; WORD $0x0000 // mov dword [rdi + 168], edx
|
||||
WORD $0x368b // mov esi, dword [rsi]
|
||||
LONG $0x00acb789; WORD $0x0000 // mov dword [rdi + 172], esi
|
||||
WORD $0xca29 // sub edx, ecx
|
||||
LONG $0x00a49789; WORD $0x0000 // mov dword [rdi + 164], edx
|
||||
WORD $0xc229 // sub edx, eax
|
||||
LONG $0x00a09789; WORD $0x0000 // mov dword [rdi + 160], edx
|
||||
RET
|
||||
|
||||
DATA LCDATA4<>+0x000(SB)/8, $0x0000000200000001
|
||||
|
@ -257,7 +257,7 @@ DATA LCDATA4<>+0x010(SB)/8, $0xfffffffeffffffff
|
|||
DATA LCDATA4<>+0x018(SB)/8, $0xfffffffcfffffffe
|
||||
GLOBL LCDATA4<>(SB), 8, $32
|
||||
|
||||
TEXT ·_HybridFilterEncodeSSE2(SB), $0-16
|
||||
TEXT ·sse2Encode(SB), $0-16
|
||||
|
||||
MOVQ fs+0(FP), DI
|
||||
MOVQ in+8(FP), SI
|
||||
|
@ -343,7 +343,7 @@ LBB3_5:
|
|||
WORD $0x4789; BYTE $0x04 // mov dword [rdi + 4], eax
|
||||
RET
|
||||
|
||||
TEXT ·_HybridFilterDecodeCompat(SB), $32-16
|
||||
TEXT ·x64Decode(SB), $32-16
|
||||
|
||||
MOVQ fs+0(FP), DI
|
||||
MOVQ in+8(FP), SI
|
||||
|
@ -481,7 +481,7 @@ LBB4_6:
|
|||
SUBQ $8, SP
|
||||
RET
|
||||
|
||||
TEXT ·_HybridFilterEncodeCompat(SB), $32-16
|
||||
TEXT ·x64Encode(SB), $32-16
|
||||
|
||||
MOVQ fs+0(FP), DI
|
||||
MOVQ in+8(FP), SI
|
||||
|
|
|
@ -7,62 +7,62 @@ import (
|
|||
"testing"
|
||||
)
|
||||
|
||||
func BenchmarkEncodeSSE4(b *testing.B) {
|
||||
func BenchmarkSSE4Encode(b *testing.B) {
|
||||
f := New([8]byte{1, 2, 3, 4, 5, 6, 7, 8}, 8)
|
||||
var in int32
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
EncodeSSE4(f, &in)
|
||||
SSE4Encode(f, &in)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkEncodeSSE2(b *testing.B) {
|
||||
func BenchmarkSSE2Encode(b *testing.B) {
|
||||
f := New([8]byte{1, 2, 3, 4, 5, 6, 7, 8}, 8)
|
||||
var in int32
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
EncodeSSE2(f, &in)
|
||||
SSE2Encode(f, &in)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkEncodeCompatX64(b *testing.B) {
|
||||
func BenchmarkX64Encode(b *testing.B) {
|
||||
f := New([8]byte{1, 2, 3, 4, 5, 6, 7, 8}, 8)
|
||||
var in int32
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
EncodeCompat(f, &in)
|
||||
X64Encode(f, &in)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkDecodeSSE4(b *testing.B) {
|
||||
func BenchmarkSSE4Decode(b *testing.B) {
|
||||
f := New([8]byte{1, 2, 3, 4, 5, 6, 7, 8}, 8)
|
||||
var in int32
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
DecodeSSE4(f, &in)
|
||||
SSE4Decode(f, &in)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkDecodeSSE2(b *testing.B) {
|
||||
func BenchmarkSSE2Decode(b *testing.B) {
|
||||
f := New([8]byte{1, 2, 3, 4, 5, 6, 7, 8}, 8)
|
||||
var in int32
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
DecodeSSE2(f, &in)
|
||||
SSE2Decode(f, &in)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkDecodeCompatX64(b *testing.B) {
|
||||
func BenchmarkX64Decode(b *testing.B) {
|
||||
f := New([8]byte{1, 2, 3, 4, 5, 6, 7, 8}, 8)
|
||||
var in int32
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
DecodeCompat(f, &in)
|
||||
X64Decode(f, &in)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
|
@ -1,11 +0,0 @@
|
|||
//+build !amd64
|
||||
|
||||
package filter
|
||||
|
||||
func DecodeCompat(f *Filter, in *int32) {
|
||||
decodeCompat(f, in)
|
||||
}
|
||||
|
||||
func EncodeCompat(f *Filter, in *int32) {
|
||||
encodeCompat(f, in)
|
||||
}
|
|
@ -4,22 +4,22 @@ import (
|
|||
"testing"
|
||||
)
|
||||
|
||||
func BenchmarkEncodeCompat(b *testing.B) {
|
||||
func BenchmarkCompatEncode(b *testing.B) {
|
||||
f := New([8]byte{1, 2, 3, 4, 5, 6, 7, 8}, 8)
|
||||
var in int32
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
encodeCompat(f, &in)
|
||||
CompatEncode(f, &in)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkDecodeCompat(b *testing.B) {
|
||||
func BenchmarkCompatDecode(b *testing.B) {
|
||||
f := New([8]byte{1, 2, 3, 4, 5, 6, 7, 8}, 8)
|
||||
var in int32
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
decodeCompat(f, &in)
|
||||
CompatDecode(f, &in)
|
||||
}
|
||||
})
|
||||
}
|
Loading…
Reference in a new issue