13
13
# limitations under the License.
14
14
15
15
from .HashGroupBuilderPair import HashGroupBuilderPair
16
+ from .ManifestSizeCalculator import ManifestSizeCalculator
16
17
from .OptimizerResult import OptimizerResult
17
18
from .TreeOptimizer import TreeOptimizer
18
19
from ..HashGroupBuilder import HashGroupBuilder
@@ -33,7 +34,6 @@ class TreeParameters:
33
34
34
35
Usually, one calls `create_optimized_tree` to calculate these parameters.
35
36
"""
36
- __MAX_MANIFEST_ID = 0xFFFFFF
37
37
38
38
@classmethod
39
39
def create_optimized_tree (cls ,
@@ -49,12 +49,13 @@ def create_optimized_tree(cls,
49
49
max_packet_size = manifest_factory .tree_options ().max_packet_size
50
50
max_tree_degree = manifest_factory .tree_options ().max_tree_degree
51
51
52
- num_pointers_per_node = cls . _calculate_max_pointers (max_packet_size = max_packet_size ,
52
+ num_pointers_per_node = ManifestSizeCalculator (max_packet_size = max_packet_size ,
53
53
manifest_factory = manifest_factory ,
54
54
name_ctx = name_ctx ,
55
- total_bytes = file_metadata .total_bytes )
55
+ total_bytes = file_metadata .total_bytes ). calculate_max_pointers ()
56
56
57
57
if num_pointers_per_node < 2 :
58
+ # TODO: This is not entirely true. If the app data is only 1 chunk, it could work.
58
59
raise ValueError ("With a max_packet_size of %r there is only %r pointers per node, must have at least 2" %
59
60
(max_packet_size , num_pointers_per_node ))
60
61
@@ -125,94 +126,12 @@ def tree_height(self) -> int:
125
126
"""
126
127
return self ._solution .tree_height ()
127
128
128
- @classmethod
129
- def _build_single_hash_group (cls , manifest_factory , indirect_ptrs , direct_ptrs , nc_id , total_bytes ):
130
- # We use total bytes for the subtree size and leaf size. This might end up reserving one one more byte
131
- # than necessary if it overflows.
132
- hgb1 = HashGroupBuilder ()
133
- for ptr in indirect_ptrs :
134
- hgb1 .append_indirect (ptr , subtree_size = total_bytes )
135
- for ptr in direct_ptrs :
136
- hgb1 .append_direct (ptr , leaf_size = total_bytes )
137
-
138
- hg1 = hgb1 .hash_group (nc_id = nc_id ,
139
- include_leaf_size = manifest_factory .tree_options ().add_group_leaf_size ,
140
- include_subtree_size = manifest_factory .tree_options ().add_group_subtree_size )
141
- return hg1
142
-
143
- @classmethod
144
- def _build_manifest_packet (cls , manifest_factory , num_hashes , hv , name_ctx , total_bytes ):
145
- # Arbitrary choise, we put n-1 into direct and 1 into indirect
146
- hgb = HashGroupBuilderPair (name_ctx = name_ctx , max_direct = num_hashes - 1 , max_indirect = 1 )
147
-
148
- for hv in (num_hashes - 1 ) * [hv ]:
149
- hgb .prepend_direct (hv )
150
- hgb .prepend_indirect (hv )
151
- if name_ctx .manifest_schema_impl .uses_name_id ():
152
- indirect_start_segment_id = StartSegmentId (cls .__MAX_MANIFEST_ID )
153
- else :
154
- indirect_start_segment_id = None
155
-
156
- if name_ctx .data_schema_impl .uses_name_id ():
157
- direct_start_segment_id = StartSegmentId (SchemaImpl ._MAX_CHUNK_ID )
158
- else :
159
- direct_start_segment_id = None
160
-
161
- # include_leaf_size and include_subtree_size might reserve too much space if we do not use those.
162
- hash_groups = hgb .hash_groups (include_leaf_size = True ,
163
- include_subtree_size = True ,
164
- indirect_start_segment_id = indirect_start_segment_id ,
165
- direct_start_segment_id = direct_start_segment_id )
166
-
167
- packet = manifest_factory .build_packet (source = hash_groups ,
168
- node_subtree_size = total_bytes )
169
-
170
- return packet
171
-
172
- @classmethod
173
- def _calculate_max_pointers (cls , max_packet_size : int , manifest_factory : ManifestFactory ,
174
- name_ctx : NameConstructorContext , total_bytes : int ):
175
- """
176
- Create a Manifest with the specified number of tree pointers and figure out how much space we have left
177
- out of self._max_size. Then figure out how many data pointers we can fit in.
178
-
179
- We only put metadata and locators and things like that in the root manifest.
180
-
181
- :param max_packet_size: The maximum ccnpy.Packet size (bytes)
182
- :param manifest_factory: Factory used to create manifests
183
- :param total_bytes: The total file bytes. We need to reserve big enough ints for leaf_size and subtree_size
184
- :return: The number of data points we can fit in a max_size nameless manifest
185
- """
186
- # Assume 32-byte sha256 hashes
187
- hv = HashValue .create_sha256 (32 * [0 ])
188
- hash_value_len = len (hv )
189
- packet = cls ._build_manifest_packet (manifest_factory , 1 , hv , name_ctx , total_bytes )
190
- length = len (packet )
191
- if length >= max_packet_size :
192
- raise ValueError ("An empty manifest packet is %r bytes and exceeds max_size %r" % (length , max_packet_size ))
193
-
194
- slack = max_packet_size - length
195
- # +1 because we already have 1 hash in the manifest
196
- num_hashes = int (slack / hash_value_len ) + 1
197
-
198
- # Now validate that it works
199
- packet = cls ._build_manifest_packet (manifest_factory , num_hashes , hv , name_ctx , total_bytes )
200
- length = len (packet )
201
- if length > max_packet_size :
202
- raise ValueError (
203
- "A filled manifest packet is %r bytes with %r hashes, a hash is %r bytes, and exceeds max_size %r" %
204
- (length , num_hashes , hash_value_len , max_packet_size ))
205
-
206
- #print("calculate_max_pointers = %r in length %r, actual length %r" % (num_hashes, max_packet_size, length))
207
-
208
- if num_hashes < 2 :
209
- min_packet_size = len (packet ) + hash_value_len
210
- raise ValueError ("With max_packet_size %r there are %r hashes/manifest, must have at least 2."
211
- " Minimum packet_size is %r" % (max_packet_size , num_hashes , min_packet_size ))
212
- return num_hashes
213
-
214
129
@staticmethod
215
130
def _optimize_tree (total_direct_nodes :int , num_pointers_per_node : int ) -> OptimizerResult :
216
131
to = TreeOptimizer (num_direct_nodes = total_direct_nodes ,
217
132
num_pointers = num_pointers_per_node )
133
+
134
+ # There are a few possible outputs from the tree optimizer. In general, we use
135
+ # this one, as it picks the tree that fits the data well (minimizes waste), and then
136
+ # from those picks one with minimum height.
218
137
return to .minimize_waste_min_height ()
0 commit comments