Fixes package filter cont'd

This commit is contained in:
Markus Tzoe 2017-07-22 04:31:52 +08:00
parent 36bbb1747b
commit f8381ea58f
5 changed files with 132 additions and 20 deletions

View file

@ -6,13 +6,13 @@ import (
func init() {
if cpuid.CPU.SSE4() {
encode = _HybridFilterEncodeSSE4
decode = _HybridFilterDecodeSSE4
encode = EncodeSSE4
decode = DecodeSSE4
} else if cpuid.CPU.SSE2() {
encode = _HybridFilterEncodeSSE2
decode = _HybridFilterDecodeSSE2
encode = EncodeSSE2
decode = DecodeSSE2
} else {
encode = _HybridFilterEncodeCompat
decode = _HybridFilterDecodeCompat
encode = EncodeCompat
decode = DecodeCompat
}
}

View file

@ -1,9 +1,5 @@
package filter
import (
"unsafe"
)
// Filter exposes Decode and Encode methods for data manipulation
type Filter struct {
index int32
@ -15,7 +11,7 @@ type Filter struct {
dl [24]int32
}
type codec func(fs, in unsafe.Pointer)
type codec func(fs *Filter, in *int32)
var (
decode codec
@ -39,9 +35,9 @@ func New(data [8]byte, shift uint32) *Filter {
}
func (f *Filter) Decode(in *int32) {
decode(unsafe.Pointer(f), unsafe.Pointer(in))
decode(f, in)
}
func (f *Filter) Encode(in *int32) {
encode(unsafe.Pointer(f), unsafe.Pointer(in))
encode(f, in)
}

View file

@ -7,6 +7,17 @@ import (
"unsafe"
)
func DecodeSSE4(f *Filter, in *int32) { _HybridFilterDecodeSSE4(unsafe.Pointer(f), unsafe.Pointer(in)) }
func EncodeSSE4(f *Filter, in *int32) { _HybridFilterEncodeSSE4(unsafe.Pointer(f), unsafe.Pointer(in)) }
func DecodeSSE2(f *Filter, in *int32) { _HybridFilterDecodeSSE2(unsafe.Pointer(f), unsafe.Pointer(in)) }
func EncodeSSE2(f *Filter, in *int32) { _HybridFilterEncodeSSE2(unsafe.Pointer(f), unsafe.Pointer(in)) }
func DecodeCompat(f *Filter, in *int32) {
_HybridFilterDecodeCompat(unsafe.Pointer(f), unsafe.Pointer(in))
}
func EncodeCompat(f *Filter, in *int32) {
_HybridFilterEncodeCompat(unsafe.Pointer(f), unsafe.Pointer(in))
}
//go:noescape
func _HybridFilterDecodeSSE4(fs, in unsafe.Pointer)

106
filter/filter_compat.go Normal file
View file

@ -0,0 +1,106 @@
//+build !amd64
package filter
func DecodeCompat(f *Filter, in *int32) {
pa := f.dl[:]
pb := f.qm[:]
pm := f.dx[:]
sum := f.round
if f.error < 0 {
pb[0] -= pm[0]
pb[1] -= pm[1]
pb[2] -= pm[2]
pb[3] -= pm[3]
pb[4] -= pm[4]
pb[5] -= pm[5]
pb[6] -= pm[6]
pb[7] -= pm[7]
} else if f.error > 0 {
pb[0] += pm[0]
pb[1] += pm[1]
pb[2] += pm[2]
pb[3] += pm[3]
pb[4] += pm[4]
pb[5] += pm[5]
pb[6] += pm[6]
pb[7] += pm[7]
}
sum += pa[0]*pb[0] + pa[1]*pb[1] + pa[2]*pb[2] + pa[3]*pb[3] +
pa[4]*pb[4] + pa[5]*pb[5] + pa[6]*pb[6] + pa[7]*pb[7]
pm[0] = pm[1]
pm[1] = pm[2]
pm[2] = pm[3]
pm[3] = pm[4]
pa[0] = pa[1]
pa[1] = pa[2]
pa[2] = pa[3]
pa[3] = pa[4]
pm[4] = ((pa[4] >> 30) | 1)
pm[5] = ((pa[5] >> 30) | 2) & ^1
pm[6] = ((pa[6] >> 30) | 2) & ^1
pm[7] = ((pa[7] >> 30) | 4) & ^3
f.error = *in
*in += (sum >> uint32(f.shift))
pa[4] = -pa[5]
pa[5] = -pa[6]
pa[6] = *in - pa[7]
pa[7] = *in
pa[5] += pa[6]
pa[4] += pa[5]
}
func EncodeCompat(f *Filter, in *int32) {
pa := f.dl[:]
pb := f.qm[:]
pm := f.dx[:]
sum := f.round
if f.error < 0 {
pb[0] -= pm[0]
pb[1] -= pm[1]
pb[2] -= pm[2]
pb[3] -= pm[3]
pb[4] -= pm[4]
pb[5] -= pm[5]
pb[6] -= pm[6]
pb[7] -= pm[7]
} else if f.error > 0 {
pb[0] += pm[0]
pb[1] += pm[1]
pb[2] += pm[2]
pb[3] += pm[3]
pb[4] += pm[4]
pb[5] += pm[5]
pb[6] += pm[6]
pb[7] += pm[7]
}
sum += pa[0]*pb[0] + pa[1]*pb[1] + pa[2]*pb[2] + pa[3]*pb[3] +
pa[4]*pb[4] + pa[5]*pb[5] + pa[6]*pb[6] + pa[7]*pb[7]
pm[0] = pm[1]
pm[1] = pm[2]
pm[2] = pm[3]
pm[3] = pm[4]
pa[0] = pa[1]
pa[1] = pa[2]
pa[2] = pa[3]
pa[3] = pa[4]
pm[4] = ((pa[4] >> 30) | 1)
pm[5] = ((pa[5] >> 30) | 2) & ^1
pm[6] = ((pa[6] >> 30) | 2) & ^1
pm[7] = ((pa[7] >> 30) | 4) & ^3
pa[4] = -pa[5]
pa[5] = -pa[6]
pa[6] = *in - pa[7]
pa[7] = *in
pa[5] += pa[6]
pa[4] += pa[5]
*in -= (sum >> uint32(f.shift))
f.error = *in
}

View file

@ -2,7 +2,6 @@ package filter
import (
"testing"
"unsafe"
)
func BenchmarkEncodeSSE4(b *testing.B) {
@ -10,7 +9,7 @@ func BenchmarkEncodeSSE4(b *testing.B) {
var in int32
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_HybridFilterEncodeSSE4(unsafe.Pointer(f), unsafe.Pointer(&in))
EncodeSSE4(f, &in)
}
})
}
@ -20,7 +19,7 @@ func BenchmarkEncodeSSE2(b *testing.B) {
var in int32
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_HybridFilterEncodeSSE2(unsafe.Pointer(f), unsafe.Pointer(&in))
EncodeSSE2(f, &in)
}
})
}
@ -30,7 +29,7 @@ func BenchmarkEncodeCompat(b *testing.B) {
var in int32
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_HybridFilterEncodeCompat(unsafe.Pointer(f), unsafe.Pointer(&in))
EncodeCompat(f, &in)
}
})
}
@ -40,7 +39,7 @@ func BenchmarkDecodeSSE4(b *testing.B) {
var in int32
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_HybridFilterDecodeSSE4(unsafe.Pointer(f), unsafe.Pointer(&in))
DecodeSSE4(f, &in)
}
})
}
@ -50,7 +49,7 @@ func BenchmarkDecodeSSE2(b *testing.B) {
var in int32
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_HybridFilterDecodeSSE2(unsafe.Pointer(f), unsafe.Pointer(&in))
DecodeSSE2(f, &in)
}
})
}
@ -60,7 +59,7 @@ func BenchmarkDecodeCompat(b *testing.B) {
var in int32
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_HybridFilterDecodeCompat(unsafe.Pointer(f), unsafe.Pointer(&in))
DecodeCompat(f, &in)
}
})
}