From 6e75eec88c168a0cbe2237b683d5a872ce0bea76 Mon Sep 17 00:00:00 2001 From: Markus Tzoe Date: Wed, 23 Aug 2017 15:32:30 +0800 Subject: [PATCH] update filter package --- filter/cpu.go | 14 +- filter/filter.go | 8 +- filter/filter_amd64.go | 28 ++- filter/filter_amd64.s | 196 +++++++++--------- filter/filter_amd64_test.go | 24 +-- filter/filter_compat.go | 11 - .../{filter_compat_test.go => filter_test.go} | 8 +- 7 files changed, 138 insertions(+), 151 deletions(-) delete mode 100644 filter/filter_compat.go rename filter/{filter_compat_test.go => filter_test.go} (68%) diff --git a/filter/cpu.go b/filter/cpu.go index 4aa2673..c1f5ce7 100644 --- a/filter/cpu.go +++ b/filter/cpu.go @@ -1,3 +1,5 @@ +//+build amd64 + package filter import ( @@ -6,13 +8,13 @@ import ( func init() { if cpuid.CPU.SSE4() { - encode = EncodeSSE4 - decode = DecodeSSE4 + encode = SSE4Encode + decode = SSE4Decode } else if cpuid.CPU.SSE2() { - encode = EncodeSSE2 - decode = DecodeSSE2 + encode = SSE2Encode + decode = SSE2Decode } else { - encode = EncodeCompat - decode = DecodeCompat + encode = X64Encode + decode = X64Decode } } diff --git a/filter/filter.go b/filter/filter.go index a987944..7410a4c 100644 --- a/filter/filter.go +++ b/filter/filter.go @@ -14,8 +14,8 @@ type Filter struct { type codec func(fs *Filter, in *int32) var ( - decode codec - encode codec + decode codec = CompatDecode + encode codec = CompatEncode ) // New creates a Filter based on data and shift @@ -42,7 +42,7 @@ func (f *Filter) Encode(in *int32) { encode(f, in) } -func decodeCompat(f *Filter, in *int32) { +func CompatDecode(f *Filter, in *int32) { pa := f.dl[:] pb := f.qm[:] pm := f.dx[:] @@ -92,7 +92,7 @@ func decodeCompat(f *Filter, in *int32) { pa[4] += pa[5] } -func encodeCompat(f *Filter, in *int32) { +func CompatEncode(f *Filter, in *int32) { pa := f.dl[:] pb := f.qm[:] pm := f.dx[:] diff --git a/filter/filter_amd64.go b/filter/filter_amd64.go index 34a0701..0a4002a 100644 --- a/filter/filter_amd64.go +++ b/filter/filter_amd64.go @@ -7,31 +7,27 @@ import ( "unsafe" ) -func DecodeSSE4(f *Filter, in *int32) { _HybridFilterDecodeSSE4(unsafe.Pointer(f), unsafe.Pointer(in)) } -func EncodeSSE4(f *Filter, in *int32) { _HybridFilterEncodeSSE4(unsafe.Pointer(f), unsafe.Pointer(in)) } -func DecodeSSE2(f *Filter, in *int32) { _HybridFilterDecodeSSE2(unsafe.Pointer(f), unsafe.Pointer(in)) } -func EncodeSSE2(f *Filter, in *int32) { _HybridFilterEncodeSSE2(unsafe.Pointer(f), unsafe.Pointer(in)) } -func DecodeCompat(f *Filter, in *int32) { - _HybridFilterDecodeCompat(unsafe.Pointer(f), unsafe.Pointer(in)) -} -func EncodeCompat(f *Filter, in *int32) { - _HybridFilterEncodeCompat(unsafe.Pointer(f), unsafe.Pointer(in)) -} +func SSE4Decode(f *Filter, in *int32) { sse4Decode(unsafe.Pointer(f), unsafe.Pointer(in)) } +func SSE4Encode(f *Filter, in *int32) { sse4Encode(unsafe.Pointer(f), unsafe.Pointer(in)) } +func SSE2Decode(f *Filter, in *int32) { sse2Decode(unsafe.Pointer(f), unsafe.Pointer(in)) } +func SSE2Encode(f *Filter, in *int32) { sse2Encode(unsafe.Pointer(f), unsafe.Pointer(in)) } +func X64Decode(f *Filter, in *int32) { x64Decode(unsafe.Pointer(f), unsafe.Pointer(in)) } +func X64Encode(f *Filter, in *int32) { x64Encode(unsafe.Pointer(f), unsafe.Pointer(in)) } //go:noescape -func _HybridFilterDecodeSSE4(fs, in unsafe.Pointer) +func sse4Decode(fs, in unsafe.Pointer) //go:noescape -func _HybridFilterEncodeSSE4(fs, in unsafe.Pointer) +func sse4Encode(fs, in unsafe.Pointer) //go:noescape -func _HybridFilterDecodeSSE2(fs, in unsafe.Pointer) +func sse2Decode(fs, in unsafe.Pointer) //go:noescape -func _HybridFilterEncodeSSE2(fs, in unsafe.Pointer) +func sse2Encode(fs, in unsafe.Pointer) //go:noescape -func _HybridFilterDecodeCompat(fs, in unsafe.Pointer) +func x64Decode(fs, in unsafe.Pointer) //go:noescape -func _HybridFilterEncodeCompat(fs, in unsafe.Pointer) +func x64Encode(fs, in unsafe.Pointer) diff --git a/filter/filter_amd64.s b/filter/filter_amd64.s index 2fa98e6..f0f4d8f 100644 --- a/filter/filter_amd64.s +++ b/filter/filter_amd64.s @@ -1,5 +1,5 @@ //+build !noasm !appengine -// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT +// AUTO-GENERATED BY GOASMC -- DO NOT EDIT DATA LCDATA1<>+0x000(SB)/8, $0x0000000200000001 DATA LCDATA1<>+0x008(SB)/8, $0x0000000400000002 @@ -7,7 +7,7 @@ DATA LCDATA1<>+0x010(SB)/8, $0xfffffffeffffffff DATA LCDATA1<>+0x018(SB)/8, $0xfffffffcfffffffe GLOBL LCDATA1<>(SB), 8, $32 -TEXT ·_HybridFilterDecodeSSE4(SB), $0-16 +TEXT ·sse4Decode(SB), $0-16 MOVQ fs+0(FP), DI MOVQ in+8(FP), SI @@ -86,7 +86,7 @@ DATA LCDATA2<>+0x010(SB)/8, $0xfffffffeffffffff DATA LCDATA2<>+0x018(SB)/8, $0xfffffffcfffffffe GLOBL LCDATA2<>(SB), 8, $32 -TEXT ·_HybridFilterDecodeSSE2(SB), $0-16 +TEXT ·sse4Encode(SB), $0-16 MOVQ fs+0(FP), DI MOVQ in+8(FP), SI @@ -118,57 +118,46 @@ LBB1_4: LONG $0x677f0f66; BYTE $0x20 // movdqa oword [rdi + 32], xmm4 LBB1_5: - LONG $0xf26f0f66 // movdqa xmm6, xmm2 - LONG $0xf5f40f66 // pmuludq xmm6, xmm5 - LONG $0xfa700f66; BYTE $0xb1 // pshufd xmm7, xmm2, 177 - LONG $0xed700f66; BYTE $0xb1 // pshufd xmm5, xmm5, 177 - LONG $0xeff40f66 // pmuludq xmm5, xmm7 - LONG $0xed700f66; BYTE $0xa0 // pshufd xmm5, xmm5, 160 - LONG $0x0e3a0f66; WORD $0x33ee // pblendw xmm5, xmm6, 51 - LONG $0xf06f0f66 // movdqa xmm6, xmm0 - LONG $0xf4f40f66 // pmuludq xmm6, xmm4 - LONG $0xf8700f66; BYTE $0xb1 // pshufd xmm7, xmm0, 177 - LONG $0xe4700f66; BYTE $0xb1 // pshufd xmm4, xmm4, 177 - LONG $0xe7f40f66 // pmuludq xmm4, xmm7 - LONG $0xe4700f66; BYTE $0xa0 // pshufd xmm4, xmm4, 160 - LONG $0x0e3a0f66; WORD $0x33e6 // pblendw xmm4, xmm6, 51 - LONG $0xe5fe0f66 // paddd xmm4, xmm5 - LONG $0xec700f66; BYTE $0xee // pshufd xmm5, xmm4, 238 - LONG $0xecfe0f66 // paddd xmm5, xmm4 - LONG $0xea7e0f66 // movd edx, xmm5 - LONG $0x163a0f66; WORD $0x01e8 // pextrd eax, xmm5, 1 - WORD $0x0144; BYTE $0xc0 // add eax, r8d - WORD $0xd001 // add eax, edx - LONG $0x0f3a0f66; WORD $0x04cb // palignr xmm1, xmm3, 4 - LONG $0xd86f0f66 // movdqa xmm3, xmm0 - LONG $0x0f3a0f66; WORD $0x04da // palignr xmm3, xmm2, 4 - LONG $0xe0720f66; BYTE $0x1e // psrad xmm0, 30 - LONG $0x45eb0f66; BYTE $0x00 // por xmm0, oword 0[rbp] /* [rip + LCPI1_0] */ - LONG $0x45db0f66; BYTE $0x10 // pand xmm0, oword 16[rbp] /* [rip + LCPI1_1] */ - LONG $0x9f7f0f66; LONG $0x00000090 // movdqa oword [rdi + 144], xmm3 - LONG $0x4f7f0f66; BYTE $0x30 // movdqa oword [rdi + 48], xmm1 - LONG $0x477f0f66; BYTE $0x40 // movdqa oword [rdi + 64], xmm0 - WORD $0x168b // mov edx, dword [rsi] - WORD $0x5789; BYTE $0x04 // mov dword [rdi + 4], edx - WORD $0xf8d3 // sar eax, cl - WORD $0x0601 // add dword [rsi], eax - LONG $0x00a4878b; WORD $0x0000 // mov eax, dword [rdi + 164] - WORD $0xc189 // mov ecx, eax - WORD $0xd9f7 // neg ecx - LONG $0x00a08f89; WORD $0x0000 // mov dword [rdi + 160], ecx - LONG $0x00a88f8b; WORD $0x0000 // mov ecx, dword [rdi + 168] - WORD $0xca89 // mov edx, ecx - WORD $0xdaf7 // neg edx - LONG $0x00a49789; WORD $0x0000 // mov dword [rdi + 164], edx - WORD $0x168b // mov edx, dword [rsi] - LONG $0x00ac972b; WORD $0x0000 // sub edx, dword [rdi + 172] - LONG $0x00a89789; WORD $0x0000 // mov dword [rdi + 168], edx - WORD $0x368b // mov esi, dword [rsi] - LONG $0x00acb789; WORD $0x0000 // mov dword [rdi + 172], esi - WORD $0xca29 // sub edx, ecx - LONG $0x00a49789; WORD $0x0000 // mov dword [rdi + 164], edx - WORD $0xc229 // sub edx, eax - LONG $0x00a09789; WORD $0x0000 // mov dword [rdi + 160], edx + LONG $0x40380f66; BYTE $0xea // pmulld xmm5, xmm2 + LONG $0x40380f66; BYTE $0xe0 // pmulld xmm4, xmm0 + LONG $0xe5fe0f66 // paddd xmm4, xmm5 + LONG $0xec700f66; BYTE $0xee // pshufd xmm5, xmm4, 238 + LONG $0xecfe0f66 // paddd xmm5, xmm4 + LONG $0xe87e0f66 // movd eax, xmm5 + LONG $0x3a0f4166; WORD $0xea16; BYTE $0x01 // pextrd r10d, xmm5, 1 + WORD $0x0145; BYTE $0xc2 // add r10d, r8d + WORD $0x0141; BYTE $0xc2 // add r10d, eax + LONG $0x0f3a0f66; WORD $0x04cb // palignr xmm1, xmm3, 4 + LONG $0xd86f0f66 // movdqa xmm3, xmm0 + LONG $0x0f3a0f66; WORD $0x04da // palignr xmm3, xmm2, 4 + LONG $0xe0720f66; BYTE $0x1e // psrad xmm0, 30 + LONG $0x45eb0f66; BYTE $0x00 // por xmm0, oword 0[rbp] /* [rip + LCPI1_0] */ + LONG $0x45db0f66; BYTE $0x10 // pand xmm0, oword 16[rbp] /* [rip + LCPI1_1] */ + LONG $0x9f7f0f66; LONG $0x00000090 // movdqa oword [rdi + 144], xmm3 + LONG $0x4f7f0f66; BYTE $0x30 // movdqa oword [rdi + 48], xmm1 + LONG $0x477f0f66; BYTE $0x40 // movdqa oword [rdi + 64], xmm0 + LONG $0xa4878b44; WORD $0x0000; BYTE $0x00 // mov r8d, dword [rdi + 164] + WORD $0x8944; BYTE $0xc0 // mov eax, r8d + WORD $0xd8f7 // neg eax + LONG $0x00a08789; WORD $0x0000 // mov dword [rdi + 160], eax + LONG $0xa88f8b44; WORD $0x0000; BYTE $0x00 // mov r9d, dword [rdi + 168] + WORD $0x8944; BYTE $0xc8 // mov eax, r9d + WORD $0xd8f7 // neg eax + LONG $0x00a48789; WORD $0x0000 // mov dword [rdi + 164], eax + WORD $0x068b // mov eax, dword [rsi] + LONG $0x00ac872b; WORD $0x0000 // sub eax, dword [rdi + 172] + LONG $0x00a88789; WORD $0x0000 // mov dword [rdi + 168], eax + WORD $0x168b // mov edx, dword [rsi] + LONG $0x00ac9789; WORD $0x0000 // mov dword [rdi + 172], edx + WORD $0x2944; BYTE $0xc8 // sub eax, r9d + LONG $0x00a48789; WORD $0x0000 // mov dword [rdi + 164], eax + WORD $0x2944; BYTE $0xc0 // sub eax, r8d + LONG $0x00a08789; WORD $0x0000 // mov dword [rdi + 160], eax + WORD $0xd341; BYTE $0xfa // sar r10d, cl + WORD $0x068b // mov eax, dword [rsi] + WORD $0x2944; BYTE $0xd0 // sub eax, r10d + WORD $0x0689 // mov dword [rsi], eax + WORD $0x4789; BYTE $0x04 // mov dword [rdi + 4], eax RET DATA LCDATA3<>+0x000(SB)/8, $0x0000000200000001 @@ -177,7 +166,7 @@ DATA LCDATA3<>+0x010(SB)/8, $0xfffffffeffffffff DATA LCDATA3<>+0x018(SB)/8, $0xfffffffcfffffffe GLOBL LCDATA3<>(SB), 8, $32 -TEXT ·_HybridFilterEncodeSSE4(SB), $0-16 +TEXT ·sse2Decode(SB), $0-16 MOVQ fs+0(FP), DI MOVQ in+8(FP), SI @@ -209,46 +198,57 @@ LBB2_4: LONG $0x677f0f66; BYTE $0x20 // movdqa oword [rdi + 32], xmm4 LBB2_5: - LONG $0x40380f66; BYTE $0xea // pmulld xmm5, xmm2 - LONG $0x40380f66; BYTE $0xe0 // pmulld xmm4, xmm0 - LONG $0xe5fe0f66 // paddd xmm4, xmm5 - LONG $0xec700f66; BYTE $0xee // pshufd xmm5, xmm4, 238 - LONG $0xecfe0f66 // paddd xmm5, xmm4 - LONG $0xe87e0f66 // movd eax, xmm5 - LONG $0x3a0f4166; WORD $0xea16; BYTE $0x01 // pextrd r10d, xmm5, 1 - WORD $0x0145; BYTE $0xc2 // add r10d, r8d - WORD $0x0141; BYTE $0xc2 // add r10d, eax - LONG $0x0f3a0f66; WORD $0x04cb // palignr xmm1, xmm3, 4 - LONG $0xd86f0f66 // movdqa xmm3, xmm0 - LONG $0x0f3a0f66; WORD $0x04da // palignr xmm3, xmm2, 4 - LONG $0xe0720f66; BYTE $0x1e // psrad xmm0, 30 - LONG $0x45eb0f66; BYTE $0x00 // por xmm0, oword 0[rbp] /* [rip + LCPI2_0] */ - LONG $0x45db0f66; BYTE $0x10 // pand xmm0, oword 16[rbp] /* [rip + LCPI2_1] */ - LONG $0x9f7f0f66; LONG $0x00000090 // movdqa oword [rdi + 144], xmm3 - LONG $0x4f7f0f66; BYTE $0x30 // movdqa oword [rdi + 48], xmm1 - LONG $0x477f0f66; BYTE $0x40 // movdqa oword [rdi + 64], xmm0 - LONG $0xa4878b44; WORD $0x0000; BYTE $0x00 // mov r8d, dword [rdi + 164] - WORD $0x8944; BYTE $0xc0 // mov eax, r8d - WORD $0xd8f7 // neg eax - LONG $0x00a08789; WORD $0x0000 // mov dword [rdi + 160], eax - LONG $0xa88f8b44; WORD $0x0000; BYTE $0x00 // mov r9d, dword [rdi + 168] - WORD $0x8944; BYTE $0xc8 // mov eax, r9d - WORD $0xd8f7 // neg eax - LONG $0x00a48789; WORD $0x0000 // mov dword [rdi + 164], eax - WORD $0x068b // mov eax, dword [rsi] - LONG $0x00ac872b; WORD $0x0000 // sub eax, dword [rdi + 172] - LONG $0x00a88789; WORD $0x0000 // mov dword [rdi + 168], eax - WORD $0x168b // mov edx, dword [rsi] - LONG $0x00ac9789; WORD $0x0000 // mov dword [rdi + 172], edx - WORD $0x2944; BYTE $0xc8 // sub eax, r9d - LONG $0x00a48789; WORD $0x0000 // mov dword [rdi + 164], eax - WORD $0x2944; BYTE $0xc0 // sub eax, r8d - LONG $0x00a08789; WORD $0x0000 // mov dword [rdi + 160], eax - WORD $0xd341; BYTE $0xfa // sar r10d, cl - WORD $0x068b // mov eax, dword [rsi] - WORD $0x2944; BYTE $0xd0 // sub eax, r10d - WORD $0x0689 // mov dword [rsi], eax - WORD $0x4789; BYTE $0x04 // mov dword [rdi + 4], eax + LONG $0xf26f0f66 // movdqa xmm6, xmm2 + LONG $0xf5f40f66 // pmuludq xmm6, xmm5 + LONG $0xfa700f66; BYTE $0xb1 // pshufd xmm7, xmm2, 177 + LONG $0xed700f66; BYTE $0xb1 // pshufd xmm5, xmm5, 177 + LONG $0xeff40f66 // pmuludq xmm5, xmm7 + LONG $0xed700f66; BYTE $0xa0 // pshufd xmm5, xmm5, 160 + LONG $0x0e3a0f66; WORD $0x33ee // pblendw xmm5, xmm6, 51 + LONG $0xf06f0f66 // movdqa xmm6, xmm0 + LONG $0xf4f40f66 // pmuludq xmm6, xmm4 + LONG $0xf8700f66; BYTE $0xb1 // pshufd xmm7, xmm0, 177 + LONG $0xe4700f66; BYTE $0xb1 // pshufd xmm4, xmm4, 177 + LONG $0xe7f40f66 // pmuludq xmm4, xmm7 + LONG $0xe4700f66; BYTE $0xa0 // pshufd xmm4, xmm4, 160 + LONG $0x0e3a0f66; WORD $0x33e6 // pblendw xmm4, xmm6, 51 + LONG $0xe5fe0f66 // paddd xmm4, xmm5 + LONG $0xec700f66; BYTE $0xee // pshufd xmm5, xmm4, 238 + LONG $0xecfe0f66 // paddd xmm5, xmm4 + LONG $0xea7e0f66 // movd edx, xmm5 + LONG $0x163a0f66; WORD $0x01e8 // pextrd eax, xmm5, 1 + WORD $0x0144; BYTE $0xc0 // add eax, r8d + WORD $0xd001 // add eax, edx + LONG $0x0f3a0f66; WORD $0x04cb // palignr xmm1, xmm3, 4 + LONG $0xd86f0f66 // movdqa xmm3, xmm0 + LONG $0x0f3a0f66; WORD $0x04da // palignr xmm3, xmm2, 4 + LONG $0xe0720f66; BYTE $0x1e // psrad xmm0, 30 + LONG $0x45eb0f66; BYTE $0x00 // por xmm0, oword 0[rbp] /* [rip + LCPI2_0] */ + LONG $0x45db0f66; BYTE $0x10 // pand xmm0, oword 16[rbp] /* [rip + LCPI2_1] */ + LONG $0x9f7f0f66; LONG $0x00000090 // movdqa oword [rdi + 144], xmm3 + LONG $0x4f7f0f66; BYTE $0x30 // movdqa oword [rdi + 48], xmm1 + LONG $0x477f0f66; BYTE $0x40 // movdqa oword [rdi + 64], xmm0 + WORD $0x168b // mov edx, dword [rsi] + WORD $0x5789; BYTE $0x04 // mov dword [rdi + 4], edx + WORD $0xf8d3 // sar eax, cl + WORD $0x0601 // add dword [rsi], eax + LONG $0x00a4878b; WORD $0x0000 // mov eax, dword [rdi + 164] + WORD $0xc189 // mov ecx, eax + WORD $0xd9f7 // neg ecx + LONG $0x00a08f89; WORD $0x0000 // mov dword [rdi + 160], ecx + LONG $0x00a88f8b; WORD $0x0000 // mov ecx, dword [rdi + 168] + WORD $0xca89 // mov edx, ecx + WORD $0xdaf7 // neg edx + LONG $0x00a49789; WORD $0x0000 // mov dword [rdi + 164], edx + WORD $0x168b // mov edx, dword [rsi] + LONG $0x00ac972b; WORD $0x0000 // sub edx, dword [rdi + 172] + LONG $0x00a89789; WORD $0x0000 // mov dword [rdi + 168], edx + WORD $0x368b // mov esi, dword [rsi] + LONG $0x00acb789; WORD $0x0000 // mov dword [rdi + 172], esi + WORD $0xca29 // sub edx, ecx + LONG $0x00a49789; WORD $0x0000 // mov dword [rdi + 164], edx + WORD $0xc229 // sub edx, eax + LONG $0x00a09789; WORD $0x0000 // mov dword [rdi + 160], edx RET DATA LCDATA4<>+0x000(SB)/8, $0x0000000200000001 @@ -257,7 +257,7 @@ DATA LCDATA4<>+0x010(SB)/8, $0xfffffffeffffffff DATA LCDATA4<>+0x018(SB)/8, $0xfffffffcfffffffe GLOBL LCDATA4<>(SB), 8, $32 -TEXT ·_HybridFilterEncodeSSE2(SB), $0-16 +TEXT ·sse2Encode(SB), $0-16 MOVQ fs+0(FP), DI MOVQ in+8(FP), SI @@ -343,7 +343,7 @@ LBB3_5: WORD $0x4789; BYTE $0x04 // mov dword [rdi + 4], eax RET -TEXT ·_HybridFilterDecodeCompat(SB), $32-16 +TEXT ·x64Decode(SB), $32-16 MOVQ fs+0(FP), DI MOVQ in+8(FP), SI @@ -481,7 +481,7 @@ LBB4_6: SUBQ $8, SP RET -TEXT ·_HybridFilterEncodeCompat(SB), $32-16 +TEXT ·x64Encode(SB), $32-16 MOVQ fs+0(FP), DI MOVQ in+8(FP), SI diff --git a/filter/filter_amd64_test.go b/filter/filter_amd64_test.go index 3181d2f..f85f464 100644 --- a/filter/filter_amd64_test.go +++ b/filter/filter_amd64_test.go @@ -7,62 +7,62 @@ import ( "testing" ) -func BenchmarkEncodeSSE4(b *testing.B) { +func BenchmarkSSE4Encode(b *testing.B) { f := New([8]byte{1, 2, 3, 4, 5, 6, 7, 8}, 8) var in int32 b.RunParallel(func(pb *testing.PB) { for pb.Next() { - EncodeSSE4(f, &in) + SSE4Encode(f, &in) } }) } -func BenchmarkEncodeSSE2(b *testing.B) { +func BenchmarkSSE2Encode(b *testing.B) { f := New([8]byte{1, 2, 3, 4, 5, 6, 7, 8}, 8) var in int32 b.RunParallel(func(pb *testing.PB) { for pb.Next() { - EncodeSSE2(f, &in) + SSE2Encode(f, &in) } }) } -func BenchmarkEncodeCompatX64(b *testing.B) { +func BenchmarkX64Encode(b *testing.B) { f := New([8]byte{1, 2, 3, 4, 5, 6, 7, 8}, 8) var in int32 b.RunParallel(func(pb *testing.PB) { for pb.Next() { - EncodeCompat(f, &in) + X64Encode(f, &in) } }) } -func BenchmarkDecodeSSE4(b *testing.B) { +func BenchmarkSSE4Decode(b *testing.B) { f := New([8]byte{1, 2, 3, 4, 5, 6, 7, 8}, 8) var in int32 b.RunParallel(func(pb *testing.PB) { for pb.Next() { - DecodeSSE4(f, &in) + SSE4Decode(f, &in) } }) } -func BenchmarkDecodeSSE2(b *testing.B) { +func BenchmarkSSE2Decode(b *testing.B) { f := New([8]byte{1, 2, 3, 4, 5, 6, 7, 8}, 8) var in int32 b.RunParallel(func(pb *testing.PB) { for pb.Next() { - DecodeSSE2(f, &in) + SSE2Decode(f, &in) } }) } -func BenchmarkDecodeCompatX64(b *testing.B) { +func BenchmarkX64Decode(b *testing.B) { f := New([8]byte{1, 2, 3, 4, 5, 6, 7, 8}, 8) var in int32 b.RunParallel(func(pb *testing.PB) { for pb.Next() { - DecodeCompat(f, &in) + X64Decode(f, &in) } }) } diff --git a/filter/filter_compat.go b/filter/filter_compat.go deleted file mode 100644 index d767fb6..0000000 --- a/filter/filter_compat.go +++ /dev/null @@ -1,11 +0,0 @@ -//+build !amd64 - -package filter - -func DecodeCompat(f *Filter, in *int32) { - decodeCompat(f, in) -} - -func EncodeCompat(f *Filter, in *int32) { - encodeCompat(f, in) -} diff --git a/filter/filter_compat_test.go b/filter/filter_test.go similarity index 68% rename from filter/filter_compat_test.go rename to filter/filter_test.go index 2d97b75..ecad5a5 100644 --- a/filter/filter_compat_test.go +++ b/filter/filter_test.go @@ -4,22 +4,22 @@ import ( "testing" ) -func BenchmarkEncodeCompat(b *testing.B) { +func BenchmarkCompatEncode(b *testing.B) { f := New([8]byte{1, 2, 3, 4, 5, 6, 7, 8}, 8) var in int32 b.RunParallel(func(pb *testing.PB) { for pb.Next() { - encodeCompat(f, &in) + CompatEncode(f, &in) } }) } -func BenchmarkDecodeCompat(b *testing.B) { +func BenchmarkCompatDecode(b *testing.B) { f := New([8]byte{1, 2, 3, 4, 5, 6, 7, 8}, 8) var in int32 b.RunParallel(func(pb *testing.PB) { for pb.Next() { - decodeCompat(f, &in) + CompatDecode(f, &in) } }) }