Skip to content

Commit 258fcfc

Browse files
committed
refactor
1 parent 12cd26d commit 258fcfc

17 files changed

+4803
-18
lines changed

Diff for: .README_images/03e5b34a.png

68.1 KB
Loading

Diff for: .README_images/21beabb6.png

61.8 KB
Loading

Diff for: .README_images/25ad07ef.png

73.1 KB
Loading

Diff for: .README_images/55a2e7d1.png

66.8 KB
Loading

Diff for: .README_images/64320512.png

66.7 KB
Loading

Diff for: .README_images/e598a60d.png

62.6 KB
Loading

Diff for: README.assets/55a2e7d1.png

66.8 KB
Loading

Diff for: README.assets/data_compress_4-8b1ba456.png

103 KB
Loading

Diff for: README.assets/e598a60d.png

62.6 KB
Loading

Diff for: README.md

+29-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,34 @@
11
# simple8b的Go实现
22

3-
TODO 2022-11-28 02:52:45 测试
3+
4+
5+
Simple8b 是 64 位算法,维护了一个查找表,实现将多个整形数据压缩到一个 64bit 长整型中。其中前 4 位表示选择器,用来标记每个值使用多少bit,后面 60 位用于存储数据。 ![simple8b算法查找表](./README.assets/data_compress_4-8b1ba456.png) 如上图所示,Integers Coded表示可压缩的数据集大小,Bits Per Integer表示每个整数分配多少 Bits 来表示,比如要压缩8个数据,选择器选择8,每个数据用7个 bits 表示,但是如果某个数据的值超过了7个 bits 的表示范围,那么就需要尝试用选择器9,只能压缩前7个数据,每个数据用8个bits来表示,以此类推。第一次未压缩的数据将压缩到一个新的64bit的长整型中,由此可见simple8b算法对小整数的压缩效果比较好,对大整数的压缩效果不佳。
6+
7+
8+
9+
一个int64类型有64位:
10+
11+
![](.README_images/21beabb6.png)
12+
13+
每8个bit分为一组用同一个颜色标识,这样看得更清楚一些,每个不同的颜色是一个byte,总共是8个byte 64个bit:
14+
15+
![](./README.assets/55a2e7d1.png)
16+
17+
其中前4个bit用来存放数值位数长度:
18+
19+
![](./README.assets/e598a60d.png)
20+
21+
比如每个数值使用4个bit来表示:
22+
23+
![](.README_images/25ad07ef.png)
24+
25+
每个数值使用10个bit来表示:
26+
27+
![](.README_images/03e5b34a.png)
28+
29+
每个数值使用15个bit来表示:
30+
31+
![](.README_images/64320512.png)
432

533

634

Diff for: docs/.$simple8b.drawio.bkp

+2,330
Large diffs are not rendered by default.

Diff for: docs/simple8b.drawio

+2,330
Large diffs are not rendered by default.

Diff for: mode.go

+69
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
package simple8b
2+
3+
import (
4+
"github.com/golang-infrastructure/go-gtypes"
5+
)
6+
7+
type Mode int
8+
9+
// simple8b的8种模式
10+
const (
11+
12+
// Mode0BitPacking1 压缩1位整数,每个整数占1位。适用于整数范围为0到1的稀疏数组。
13+
Mode0BitPacking1 Mode = 0
14+
15+
// Mode1BitPacking4 压缩4位整数,每个整数占4位。适用于整数范围为0到15的稀疏数组。
16+
Mode1BitPacking4 Mode = 1
17+
18+
// Mode2BitPacking8 压缩8位整数,每个整数占8位。适用于整数范围为0到255的稀疏数组。
19+
Mode2BitPacking8 Mode = 2
20+
21+
// Mode3BitPacking12 压缩12位整数,每个整数占12位。适用于整数范围为0到4095的稀疏数组。
22+
Mode3BitPacking12 Mode = 3
23+
24+
// Mode4BitPacking16 压缩16位整数,每个整数占16位。适用于整数范围为0到65535的稀疏数组。
25+
Mode4BitPacking16 Mode = 4
26+
27+
// Mode5BitPacking20 压缩20位整数,每个整数占20位。适用于整数范围为0到1048575的稀疏数组。
28+
Mode5BitPacking20 Mode = 5
29+
30+
// Mode6BitPacking24 压缩24位整数,每个整数占24位。适用于整数范围为0到16777215的稀疏数组。
31+
Mode6BitPacking24 Mode = 6
32+
33+
// Mode7DirectMode 直接存储整数,没有额外的压缩。适用于密集或者无法被前面7种模式有效压缩的整数数组。
34+
Mode7DirectMode Mode = 7
35+
)
36+
37+
// WhichMode 判断给定的无符号整数适合用哪种压缩模式
38+
func WhichMode[T gtypes.Unsigned](value T) Mode {
39+
if value <= 1 {
40+
return Mode0BitPacking1
41+
} else if value <= 15 {
42+
return Mode1BitPacking4
43+
} else if value <= 255 {
44+
return Mode2BitPacking8
45+
} else if value <= 4095 {
46+
return Mode3BitPacking12
47+
} else if value <= 65535 {
48+
return Mode4BitPacking16
49+
} else if value <= 1048575 {
50+
return Mode5BitPacking20
51+
} else if value <= 16777215 {
52+
return Mode6BitPacking24
53+
} else {
54+
return Mode7DirectMode
55+
}
56+
}
57+
58+
// WhichModeForSlice 判断给定的无符号切片应该使用哪种压缩模式
59+
func WhichModeForSlice[T gtypes.Unsigned](values []T) Mode {
60+
mode := Mode0BitPacking1
61+
for _, value := range values {
62+
m := WhichMode(value)
63+
// 要遵从切片中最大的那个数字的模式,要能够盛得下它
64+
if m > mode {
65+
mode = m
66+
}
67+
}
68+
return mode
69+
}

Diff for: simple16.go

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
package simple8b
2+
3+
// 对simple8的扩展
4+
5+

Diff for: simple8b.go

+5-17
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ func Encode[T gtypes.Integer](slice []T) []byte {
3131

3232
// 然后就按照这个来存储了
3333
for _, value := range slice {
34+
// 结合zigzag算法,可以用来压缩负数
3435
result = append(result, IntToBytes(zigzag.ToZigZag(value), blockSize)...)
3536
}
3637

@@ -61,23 +62,10 @@ func DecodeE[T gtypes.Integer](bytes []byte) ([]T, error) {
6162
return result, nil
6263
}
6364

64-
// IntToBytes 把给定的整数的低n位转换为字节数组
65-
func IntToBytes[T gtypes.Integer](value T, blockSize int) []byte {
66-
result := make([]byte, blockSize)
67-
for index := range result {
68-
byteValue := (uint64(0xFF) << index) & uint64(value)
69-
result[index] = uint8(byteValue)
70-
}
71-
return result
65+
func EncodeToBytes[T gtypes.Integer](slice []T) []byte {
66+
7267
}
7368

74-
// BytesToInt 把字节转为整数
75-
func BytesToInt[T gtypes.Integer](bytes []byte) T {
76-
var r uint64
77-
weight := 0
78-
for _, x := range bytes {
79-
r = r | (uint64(x) << weight)
80-
weight += 8
81-
}
82-
return T(r)
69+
func DecodeFromBytes[T gtypes.Integer](bytes []byte) ([]T, error) {
70+
8371
}

Diff for: simple8b_with_mode.go

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
package simple8b
2+
3+
func EncodeByMode() {
4+
5+
}
6+
7+
func DecodeByMode() {
8+
9+
}
10+
11+

Diff for: type_convert.go

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
package simple8b
2+
3+
import "github.com/golang-infrastructure/go-gtypes"
4+
5+
// IntToBytes 把给定的整数的低n位转换为字节数组
6+
func IntToBytes[T gtypes.Integer](value T, blockSize int) []byte {
7+
result := make([]byte, blockSize)
8+
for index := range result {
9+
byteValue := (uint64(0xFF) << index) & uint64(value)
10+
result[index] = uint8(byteValue)
11+
}
12+
return result
13+
}
14+
15+
// BytesToInt 把字节转为整数
16+
func BytesToInt[T gtypes.Integer](bytes []byte) T {
17+
var r uint64
18+
weight := 0
19+
for _, x := range bytes {
20+
r = r | (uint64(x) << weight)
21+
weight += 8
22+
}
23+
return T(r)
24+
}

0 commit comments

Comments
 (0)