Skip to content

Commit 116ee47

Browse files
authored
Merge pull request #98 from sekiyama58/main
feat: support CDict & DDict for faster operation with repeated dictionary use
2 parents 08fdaa9 + 2c0e8df commit 116ee47

File tree

7 files changed

+249
-13
lines changed

7 files changed

+249
-13
lines changed

README.md

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,22 @@ compressed_data = Zstd.compress(data, level: complession_level) # default compre
4949
compressed_using_dict = Zstd.compress("", dict: File.read('dictionary_file'))
5050
```
5151

52+
#### Compression with CDict
53+
54+
If you use the same dictionary repeatedly, you can speed up the setup by creating CDict in advance:
55+
56+
```ruby
57+
cdict = Zstd::CDict.new(File.read('dictionary_file'))
58+
compressed_using_dict = Zstd.compress("", dict: cdict)
59+
```
60+
61+
The compression_level can be specified on creating CDict.
62+
63+
```ruby
64+
cdict = Zstd::CDict.new(File.read('dictionary_file'), 5)
65+
compressed_using_dict = Zstd.compress("", dict: cdict)
66+
```
67+
5268
#### Streaming Compression
5369
```ruby
5470
stream = Zstd::StreamingCompress.new
@@ -86,6 +102,16 @@ stream << "ghi"
86102
res << stream.finish
87103
```
88104

105+
#### Streaming Compression with CDict of level 5
106+
```ruby
107+
cdict = Zstd::CDict.new(File.read('dictionary_file', 5)
108+
stream = Zstd::StreamingCompress.new(dict: cdict)
109+
stream << "abc" << "def"
110+
res = stream.flush
111+
stream << "ghi"
112+
res << stream.finish
113+
```
114+
89115
### Decompression
90116

91117
#### Simple Decompression
@@ -100,6 +126,15 @@ data = Zstd.decompress(compressed_data)
100126
Zstd.decompress(compressed_using_dict, dict: File.read('dictionary_file'))
101127
```
102128

129+
#### Decompression with DDict
130+
131+
If you use the same dictionary repeatedly, you can speed up the setup by creating DDict in advance:
132+
133+
```ruby
134+
cdict = Zstd::CDict.new(File.read('dictionary_file'))
135+
compressed_using_dict = Zstd.compress(compressed_using_dict, ddict)
136+
```
137+
103138
#### Streaming Decompression
104139
```ruby
105140
cstr = "" # Compressed data
@@ -118,6 +153,8 @@ result << stream.decompress(cstr[0, 10])
118153
result << stream.decompress(cstr[10..-1])
119154
```
120155

156+
DDict can also be specified to `dict:`.
157+
121158
### Skippable frame
122159

123160
```ruby

ext/zstdruby/common.h

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
#include <stdbool.h>
99
#include "./libzstd/zstd.h"
1010

11+
extern VALUE rb_cCDict, rb_cDDict;
12+
1113
static int convert_compression_level(VALUE compression_level_value)
1214
{
1315
if (NIL_P(compression_level_value)) {
@@ -34,12 +36,24 @@ static void set_compress_params(ZSTD_CCtx* const ctx, VALUE level_from_args, VAL
3436
ZSTD_CCtx_setParameter(ctx, ZSTD_c_compressionLevel, compression_level);
3537

3638
if (kwargs_values[1] != Qundef && kwargs_values[1] != Qnil) {
37-
char* dict_buffer = RSTRING_PTR(kwargs_values[1]);
38-
size_t dict_size = RSTRING_LEN(kwargs_values[1]);
39-
size_t load_dict_ret = ZSTD_CCtx_loadDictionary(ctx, dict_buffer, dict_size);
40-
if (ZSTD_isError(load_dict_ret)) {
39+
if (CLASS_OF(kwargs_values[1]) == rb_cCDict) {
40+
ZSTD_CDict* cdict = DATA_PTR(kwargs_values[1]);
41+
size_t ref_dict_ret = ZSTD_CCtx_refCDict(ctx, cdict);
42+
if (ZSTD_isError(ref_dict_ret)) {
43+
ZSTD_freeCCtx(ctx);
44+
rb_raise(rb_eRuntimeError, "%s", "ZSTD_CCtx_refCDict failed");
45+
}
46+
} else if (TYPE(kwargs_values[1]) == T_STRING) {
47+
char* dict_buffer = RSTRING_PTR(kwargs_values[1]);
48+
size_t dict_size = RSTRING_LEN(kwargs_values[1]);
49+
size_t load_dict_ret = ZSTD_CCtx_loadDictionary(ctx, dict_buffer, dict_size);
50+
if (ZSTD_isError(load_dict_ret)) {
51+
ZSTD_freeCCtx(ctx);
52+
rb_raise(rb_eRuntimeError, "%s", "ZSTD_CCtx_loadDictionary failed");
53+
}
54+
} else {
4155
ZSTD_freeCCtx(ctx);
42-
rb_raise(rb_eRuntimeError, "%s", "ZSTD_CCtx_loadDictionary failed");
56+
rb_raise(rb_eArgError, "`dict:` must be a Zstd::CDict or a String");
4357
}
4458
}
4559
}
@@ -113,12 +127,24 @@ static void set_decompress_params(ZSTD_DCtx* const dctx, VALUE kwargs)
113127
rb_get_kwargs(kwargs, kwargs_keys, 0, 1, kwargs_values);
114128

115129
if (kwargs_values[0] != Qundef && kwargs_values[0] != Qnil) {
116-
char* dict_buffer = RSTRING_PTR(kwargs_values[0]);
117-
size_t dict_size = RSTRING_LEN(kwargs_values[0]);
118-
size_t load_dict_ret = ZSTD_DCtx_loadDictionary(dctx, dict_buffer, dict_size);
119-
if (ZSTD_isError(load_dict_ret)) {
130+
if (CLASS_OF(kwargs_values[0]) == rb_cDDict) {
131+
ZSTD_DDict* ddict = DATA_PTR(kwargs_values[0]);
132+
size_t ref_dict_ret = ZSTD_DCtx_refDDict(dctx, ddict);
133+
if (ZSTD_isError(ref_dict_ret)) {
134+
ZSTD_freeDCtx(dctx);
135+
rb_raise(rb_eRuntimeError, "%s", "ZSTD_DCtx_refDDict failed");
136+
}
137+
} else if (TYPE(kwargs_values[0]) == T_STRING) {
138+
char* dict_buffer = RSTRING_PTR(kwargs_values[0]);
139+
size_t dict_size = RSTRING_LEN(kwargs_values[0]);
140+
size_t load_dict_ret = ZSTD_DCtx_loadDictionary(dctx, dict_buffer, dict_size);
141+
if (ZSTD_isError(load_dict_ret)) {
142+
ZSTD_freeDCtx(dctx);
143+
rb_raise(rb_eRuntimeError, "%s", "ZSTD_CCtx_loadDictionary failed");
144+
}
145+
} else {
120146
ZSTD_freeDCtx(dctx);
121-
rb_raise(rb_eRuntimeError, "%s", "ZSTD_CCtx_loadDictionary failed");
147+
rb_raise(rb_eArgError, "`dict:` must be a Zstd::DDict or a String");
122148
}
123149
}
124150
}

ext/zstdruby/main.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#include "common.h"
22

33
VALUE rb_mZstd;
4+
VALUE rb_cCDict;
5+
VALUE rb_cDDict;
46
void zstd_ruby_init(void);
57
void zstd_ruby_skippable_frame_init(void);
68
void zstd_ruby_streaming_compress_init(void);
@@ -14,6 +16,8 @@ Init_zstdruby(void)
1416
#endif
1517

1618
rb_mZstd = rb_define_module("Zstd");
19+
rb_cCDict = rb_define_class_under(rb_mZstd, "CDict", rb_cObject);
20+
rb_cDDict = rb_define_class_under(rb_mZstd, "DDict", rb_cObject);
1721
zstd_ruby_init();
1822
zstd_ruby_skippable_frame_init();
1923
zstd_ruby_streaming_compress_init();

ext/zstdruby/zstdruby.c

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,90 @@ static VALUE rb_decompress_using_dict(int argc, VALUE *argv, VALUE self)
195195
return output;
196196
}
197197

198+
static void free_cdict(void *dict)
199+
{
200+
ZSTD_freeCDict(dict);
201+
}
202+
203+
static size_t sizeof_cdict(const void *dict)
204+
{
205+
return ZSTD_sizeof_CDict(dict);
206+
}
207+
208+
static void free_ddict(void *dict)
209+
{
210+
ZSTD_freeDDict(dict);
211+
}
212+
213+
static size_t sizeof_ddict(const void *dict)
214+
{
215+
return ZSTD_sizeof_DDict(dict);
216+
}
217+
218+
static const rb_data_type_t cdict_type = {
219+
"Zstd::CDict",
220+
{0, free_cdict, sizeof_cdict,},
221+
0, 0, RUBY_TYPED_FREE_IMMEDIATELY
222+
};
223+
224+
static const rb_data_type_t ddict_type = {
225+
"Zstd::DDict",
226+
{0, free_ddict, sizeof_ddict,},
227+
0, 0, RUBY_TYPED_FREE_IMMEDIATELY
228+
};
229+
230+
static VALUE rb_cdict_alloc(VALUE self)
231+
{
232+
ZSTD_CDict* cdict = NULL;
233+
return TypedData_Wrap_Struct(self, &cdict_type, cdict);
234+
}
235+
236+
static VALUE rb_cdict_initialize(int argc, VALUE *argv, VALUE self)
237+
{
238+
VALUE dict;
239+
VALUE compression_level_value;
240+
rb_scan_args(argc, argv, "11", &dict, &compression_level_value);
241+
int compression_level = convert_compression_level(compression_level_value);
242+
243+
StringValue(dict);
244+
char* dict_buffer = RSTRING_PTR(dict);
245+
size_t dict_size = RSTRING_LEN(dict);
246+
247+
ZSTD_CDict* const cdict = ZSTD_createCDict(dict_buffer, dict_size, compression_level);
248+
if (cdict == NULL) {
249+
rb_raise(rb_eRuntimeError, "%s", "ZSTD_createCDict failed");
250+
}
251+
252+
DATA_PTR(self) = cdict;
253+
return self;
254+
}
255+
256+
static VALUE rb_ddict_alloc(VALUE self)
257+
{
258+
ZSTD_CDict* ddict = NULL;
259+
return TypedData_Wrap_Struct(self, &ddict_type, ddict);
260+
}
261+
262+
static VALUE rb_ddict_initialize(VALUE self, VALUE dict)
263+
{
264+
StringValue(dict);
265+
char* dict_buffer = RSTRING_PTR(dict);
266+
size_t dict_size = RSTRING_LEN(dict);
267+
268+
ZSTD_DDict* const ddict = ZSTD_createDDict(dict_buffer, dict_size);
269+
if (ddict == NULL) {
270+
rb_raise(rb_eRuntimeError, "%s", "ZSTD_createDDict failed");
271+
}
272+
273+
DATA_PTR(self) = ddict;
274+
return self;
275+
}
276+
277+
static VALUE rb_prohibit_copy(VALUE, VALUE)
278+
{
279+
rb_raise(rb_eRuntimeError, "CDict cannot be duplicated");
280+
}
281+
198282
void
199283
zstd_ruby_init(void)
200284
{
@@ -203,4 +287,12 @@ zstd_ruby_init(void)
203287
rb_define_module_function(rb_mZstd, "compress_using_dict", rb_compress_using_dict, -1);
204288
rb_define_module_function(rb_mZstd, "decompress", rb_decompress, -1);
205289
rb_define_module_function(rb_mZstd, "decompress_using_dict", rb_decompress_using_dict, -1);
290+
291+
rb_define_alloc_func(rb_cCDict, rb_cdict_alloc);
292+
rb_define_private_method(rb_cCDict, "initialize", rb_cdict_initialize, -1);
293+
rb_define_method(rb_cCDict, "initialize_copy", rb_prohibit_copy, 1);
294+
295+
rb_define_alloc_func(rb_cDDict, rb_ddict_alloc);
296+
rb_define_private_method(rb_cDDict, "initialize", rb_ddict_initialize, 1);
297+
rb_define_method(rb_cDDict, "initialize_copy", rb_prohibit_copy, 1);
206298
}

spec/zstd-ruby-streaming-compress_spec.rb

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353
end
5454
end
5555

56-
describe 'dictionary' do
56+
describe 'String dictionary' do
5757
let(:dictionary) do
5858
File.read("#{__dir__}/dictionary")
5959
end
@@ -72,6 +72,25 @@
7272
end
7373
end
7474

75+
describe 'Zstd::CDict dictionary' do
76+
let(:cdict) do
77+
Zstd::CDict.new(File.read("#{__dir__}/dictionary"), 5)
78+
end
79+
let(:user_json) do
80+
File.read("#{__dir__}/user_springmt.json")
81+
end
82+
it 'shoud work' do
83+
dict_stream = Zstd::StreamingCompress.new(dict: cdict)
84+
dict_stream << user_json
85+
dict_res = dict_stream.finish
86+
stream = Zstd::StreamingCompress.new(level: 5)
87+
stream << user_json
88+
res = stream.finish
89+
90+
expect(dict_res.length).to be < res.length
91+
end
92+
end
93+
7594
describe 'nil dictionary' do
7695
let(:user_json) do
7796
File.read("#{__dir__}/user_springmt.json")

spec/zstd-ruby-streaming-decompress_spec.rb

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
end
3333
end
3434

35-
describe 'dictionary streaming decompress + GC.compact' do
35+
describe 'String dictionary streaming decompress + GC.compact' do
3636
let(:dictionary) do
3737
File.read("#{__dir__}/dictionary")
3838
end
@@ -51,6 +51,28 @@
5151
end
5252
end
5353

54+
describe 'Zstd::DDict dictionary streaming decompress + GC.compact' do
55+
let(:dictionary) do
56+
File.read("#{__dir__}/dictionary")
57+
end
58+
let(:ddict) do
59+
Zstd::DDict.new(dictionary)
60+
end
61+
let(:user_json) do
62+
File.read("#{__dir__}/user_springmt.json")
63+
end
64+
it 'shoud work' do
65+
compressed_json = Zstd.compress(user_json, dict: dictionary)
66+
stream = Zstd::StreamingDecompress.new(dict: ddict)
67+
result = ''
68+
result << stream.decompress(compressed_json[0, 5])
69+
result << stream.decompress(compressed_json[5, 5])
70+
GC.compact
71+
result << stream.decompress(compressed_json[10..-1])
72+
expect(result).to eq(user_json)
73+
end
74+
end
75+
5476
describe 'nil dictionary streaming decompress + GC.compact' do
5577
let(:dictionary) do
5678
File.read("#{__dir__}/dictionary")

spec/zstd-ruby-using-dict_spec.rb

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
# https://github.com/facebook/zstd/releases/tag/v1.1.3
88

99
RSpec.describe Zstd do
10-
describe 'compress and decompress with dict keyward args' do
10+
describe 'compress and decompress with String dict keyward args' do
1111
let(:user_json) do
1212
File.read("#{__dir__}/user_springmt.json")
1313
end
@@ -52,6 +52,42 @@
5252
end
5353
end
5454

55+
describe 'compress and decompress with Zstd::CDict and Zstd::DDict dict keyward args' do
56+
let(:user_json) do
57+
File.read("#{__dir__}/user_springmt.json")
58+
end
59+
let(:cdict) do
60+
Zstd::CDict.new(File.read("#{__dir__}/dictionary"))
61+
end
62+
let(:cdict_10) do
63+
Zstd::CDict.new(File.read("#{__dir__}/dictionary"), 10)
64+
end
65+
let(:ddict) do
66+
Zstd::DDict.new(File.read("#{__dir__}/dictionary"))
67+
end
68+
69+
it 'should work' do
70+
compressed_using_dict = Zstd.compress(user_json, dict: cdict)
71+
compressed = Zstd.compress(user_json)
72+
expect(compressed_using_dict.length).to be < compressed.length
73+
expect(user_json).to eq(Zstd.decompress(compressed_using_dict, dict: ddict))
74+
end
75+
76+
it 'should be able to use dictionary multiple times' do
77+
compressed_using_dict = Zstd.compress(user_json, dict: cdict)
78+
expect(compressed_using_dict).to eq(Zstd.compress(user_json, dict: cdict))
79+
expect(user_json).to eq(Zstd.decompress(compressed_using_dict, dict: ddict))
80+
expect(user_json).to eq(Zstd.decompress(compressed_using_dict, dict: ddict))
81+
end
82+
83+
it 'should support compression levels' do
84+
compressed_using_dict = Zstd.compress(user_json, dict: cdict)
85+
compressed_using_dict_10 = Zstd.compress(user_json, dict: cdict_10)
86+
expect(compressed_using_dict_10.length).to be < compressed_using_dict.length
87+
expect(user_json).to eq(Zstd.decompress(compressed_using_dict_10, dict: ddict))
88+
end
89+
end
90+
5591
describe 'compress_using_dict' do
5692
let(:user_json) do
5793
File.read("#{__dir__}/user_springmt.json")

0 commit comments

Comments
 (0)