1
1
"""Benchmarks for VOTable binary/binary2 parsing performance."""
2
2
import io
3
- import os
4
- import tempfile
5
-
6
3
import numpy as np
7
4
from astropy .io .votable import parse , from_table
8
5
from astropy .table import Table
20
17
id_data = np .arange (LARGE_SIZE , dtype = np .int64 )
21
18
flag_data = np .random .choice ([True , False ], LARGE_SIZE )
22
19
quality_data = np .random .randint (0 , 256 , LARGE_SIZE , dtype = np .uint8 )
20
+ bool_data = np .random .randint (0 , 2 , LARGE_SIZE ).astype (bool )
23
21
24
22
short_names = np .array ([f"OBJ_{ i :08d} " for i in range (LARGE_SIZE )])
25
- filter_names = np .random .choice (['u' , 'g' , 'r' , 'i' , 'z' , 'Y' ], LARGE_SIZE )
23
+ filter_names = np .random .choice (["u" , "g" , "r" , "i" , "z" , "Y" ], LARGE_SIZE )
26
24
classifications = np .random .choice (
27
- ['STAR' , 'GALAXY' , 'QSO' , 'UNKNOWN' ], LARGE_SIZE
25
+ ["STAR" , "GALAXY" , "QSO" , "UNKNOWN" ], LARGE_SIZE )
26
+ long_descriptions = np .array (
27
+ [
28
+ f"Extend description about a field { i // 1000 :04d} "
29
+ for i in range (LARGE_SIZE )
30
+ ]
28
31
)
29
- long_descriptions = np .array ([
30
- f"Extend description about a field { i // 1000 :04d} "
31
- for i in range (LARGE_SIZE )
32
- ])
33
32
34
33
35
- def create_votable_bytes (table_data , format_type = 'binary2' ):
34
+ def create_votable_bytes (
35
+ table_data ,
36
+ format_type = "binary2" ,
37
+ bitarray_size = None ):
36
38
"""Helper to create VOTables with a specific serialization."""
37
39
votable = from_table (table_data )
40
+
41
+ if bitarray_size is not None :
42
+ first_table = votable .get_first_table ()
43
+ for field in first_table .fields :
44
+ if field .datatype == "bit" :
45
+ field .arraysize = str (bitarray_size )
46
+
38
47
output = io .BytesIO ()
39
48
votable .to_xml (output , tabledata_format = format_type )
40
49
return output .getvalue ()
@@ -52,13 +61,15 @@ def setup(self):
52
61
flux_data [:LARGE_SIZE ],
53
62
count_data [:LARGE_SIZE ],
54
63
id_data [:LARGE_SIZE ],
55
- quality_data [:LARGE_SIZE ]
64
+ quality_data [:LARGE_SIZE ],
56
65
],
57
- names = ['ra' , ' dec' , ' mag' , ' flux' , ' counts' , 'id' , ' quality' ]
66
+ names = ["ra" , " dec" , " mag" , " flux" , " counts" , "id" , " quality" ],
58
67
)
59
68
60
- self .binary_data = create_votable_bytes (table , 'binary' )
61
- self .binary2_data = create_votable_bytes (table , 'binary2' )
69
+ self .binary_data = create_votable_bytes (
70
+ table , "binary" , bitarray_size = 8 )
71
+ self .binary2_data = create_votable_bytes (
72
+ table , "binary2" , bitarray_size = 8 )
62
73
63
74
def time_numeric_binary (self ):
64
75
parse (io .BytesIO (self .binary_data ))
@@ -78,13 +89,13 @@ def setup(self):
78
89
short_names [:LARGE_SIZE ],
79
90
filter_names [:LARGE_SIZE ],
80
91
classifications [:LARGE_SIZE ],
81
- mag_data [:LARGE_SIZE ]
92
+ mag_data [:LARGE_SIZE ],
82
93
],
83
- names = ['ra' , ' dec' , ' object_id' , ' filter' , ' class' , ' mag' ]
94
+ names = ["ra" , " dec" , " object_id" , " filter" , " class" , " mag" ],
84
95
)
85
96
86
- self .binary_data = create_votable_bytes (table , ' binary' )
87
- self .binary2_data = create_votable_bytes (table , ' binary2' )
97
+ self .binary_data = create_votable_bytes (table , " binary" )
98
+ self .binary2_data = create_votable_bytes (table , " binary2" )
88
99
89
100
def time_short_strings_binary (self ):
90
101
parse (io .BytesIO (self .binary_data ))
@@ -102,13 +113,13 @@ def setup(self):
102
113
ra_data [:LARGE_SIZE ],
103
114
dec_data [:LARGE_SIZE ],
104
115
long_descriptions [:LARGE_SIZE ],
105
- mag_data [:LARGE_SIZE ]
116
+ mag_data [:LARGE_SIZE ],
106
117
],
107
- names = ['ra' , ' dec' , ' description' , ' mag' ]
118
+ names = ["ra" , " dec" , " description" , " mag" ],
108
119
)
109
120
110
- self .binary_data = create_votable_bytes (table , ' binary' )
111
- self .binary2_data = create_votable_bytes (table , ' binary2' )
121
+ self .binary_data = create_votable_bytes (table , " binary" )
122
+ self .binary2_data = create_votable_bytes (table , " binary2" )
112
123
113
124
def time_long_strings_binary (self ):
114
125
parse (io .BytesIO (self .binary_data ))
@@ -126,19 +137,25 @@ def setup(self):
126
137
short_names [:LARGE_SIZE ],
127
138
filter_names [:LARGE_SIZE ],
128
139
classifications [:LARGE_SIZE ],
129
- np .random .choice (['A' , 'B' , 'C' , 'D' ], LARGE_SIZE ),
130
- np .random .choice ([' HIGH' , ' MED' , ' LOW' ], LARGE_SIZE ),
140
+ np .random .choice (["A" , "B" , "C" , "D" ], LARGE_SIZE ),
141
+ np .random .choice ([" HIGH" , " MED" , " LOW" ], LARGE_SIZE ),
131
142
long_descriptions [:LARGE_SIZE ],
132
143
ra_data [:LARGE_SIZE ],
133
- dec_data [:LARGE_SIZE ]
144
+ dec_data [:LARGE_SIZE ],
134
145
],
135
146
names = [
136
- 'id' , 'filter' , 'class' , 'grade' ,
137
- 'priority' , 'desc' , 'ra' , 'dec'
138
- ]
147
+ "id" ,
148
+ "filter" ,
149
+ "class" ,
150
+ "grade" ,
151
+ "priority" ,
152
+ "desc" ,
153
+ "ra" ,
154
+ "dec" ,
155
+ ],
139
156
)
140
157
141
- self .binary2_data = create_votable_bytes (table , ' binary2' )
158
+ self .binary2_data = create_votable_bytes (table , " binary2" )
142
159
143
160
def time_string_intensive_binary2 (self ):
144
161
parse (io .BytesIO (self .binary2_data ))
@@ -162,13 +179,21 @@ def setup(self):
162
179
np .random .choice ([True , False ], LARGE_SIZE ),
163
180
],
164
181
names = [
165
- 'ra' , 'dec' , 'saturated' , 'flagged' , 'edge_pixel' ,
166
- 'cosmic_ray' , 'variable' , 'extended' , 'public' , 'calibrated'
167
- ]
182
+ "ra" ,
183
+ "dec" ,
184
+ "saturated" ,
185
+ "flagged" ,
186
+ "edge_pixel" ,
187
+ "cosmic_ray" ,
188
+ "variable" ,
189
+ "extended" ,
190
+ "public" ,
191
+ "calibrated" ,
192
+ ],
168
193
)
169
194
170
- self .binary_data = create_votable_bytes (table , ' binary' )
171
- self .binary2_data = create_votable_bytes (table , ' binary2' )
195
+ self .binary_data = create_votable_bytes (table , " binary" )
196
+ self .binary2_data = create_votable_bytes (table , " binary2" )
172
197
173
198
def time_booleans_binary (self ):
174
199
parse (io .BytesIO (self .binary_data ))
@@ -177,6 +202,57 @@ def time_booleans_binary2(self):
177
202
parse (io .BytesIO (self .binary2_data ))
178
203
179
204
205
+ class TimeVOTableBitArrayOptimization :
206
+ """Benchmark BitArray columns in Binary/Binary2 VOTables."""
207
+
208
+ def setup (self ):
209
+ table = Table (
210
+ [
211
+ ra_data [:LARGE_SIZE ],
212
+ dec_data [:LARGE_SIZE ],
213
+ mag_data [:LARGE_SIZE ],
214
+ np .random .randint (0 , 2 , LARGE_SIZE ).astype (bool ),
215
+ np .random .randint (0 , 2 , LARGE_SIZE ).astype (bool ),
216
+ np .random .randint (0 , 2 , LARGE_SIZE ).astype (bool ),
217
+ np .random .randint (0 , 2 , LARGE_SIZE ).astype (bool ),
218
+ ],
219
+ names = [
220
+ "ra" ,
221
+ "dec" ,
222
+ "mag" ,
223
+ "detected" ,
224
+ "saturated" ,
225
+ "edge_pixel" ,
226
+ "cosmic_ray" ,
227
+ ],
228
+ )
229
+
230
+ self .binary_bitarray_8_data = create_votable_bytes (
231
+ table , "binary" , "8" )
232
+ self .binary_bitarray_16_data = create_votable_bytes (
233
+ table , "binary" , "16" )
234
+ self .binary2_bitarray_8_data = create_votable_bytes (
235
+ table , "binary2" , "8" )
236
+ self .binary2_bitarray_16_data = create_votable_bytes (
237
+ table , "binary2" , "16" )
238
+
239
+ def time_bitarray_8bit_binary (self ):
240
+ """Parse BitArray with 8-bit arraysize."""
241
+ parse (io .BytesIO (self .binary_bitarray_8_data ))
242
+
243
+ def time_bitarray_16bit_binary (self ):
244
+ """Parse BitArray with 16-bit arraysize."""
245
+ parse (io .BytesIO (self .binary_bitarray_16_data ))
246
+
247
+ def time_bitarray_8bit_binary2 (self ):
248
+ """Parse binary2 BitArray with 8-bit arraysize."""
249
+ parse (io .BytesIO (self .binary2_bitarray_8_data ))
250
+
251
+ def time_bitarray_16bit_binary2 (self ):
252
+ """Parse binary2 BitArray with 16-bit arraysize."""
253
+ parse (io .BytesIO (self .binary2_bitarray_16_data ))
254
+
255
+
180
256
class TimeVOTableMixed :
181
257
"""Benchmark for a table with mixed fields types."""
182
258
@@ -195,13 +271,21 @@ def setup(self):
195
271
flag_data [:LARGE_SIZE ],
196
272
],
197
273
names = [
198
- 'ra' , 'dec' , 'id' , 'mag' , 'flux' ,
199
- 'filter' , 'class' , 'counts' , 'quality' , 'detected'
200
- ]
274
+ "ra" ,
275
+ "dec" ,
276
+ "id" ,
277
+ "mag" ,
278
+ "flux" ,
279
+ "filter" ,
280
+ "class" ,
281
+ "counts" ,
282
+ "quality" ,
283
+ "detected" ,
284
+ ],
201
285
)
202
286
203
- self .binary_data = create_votable_bytes (table , ' binary' )
204
- self .binary2_data = create_votable_bytes (table , ' binary2' )
287
+ self .binary_data = create_votable_bytes (table , " binary" )
288
+ self .binary2_data = create_votable_bytes (table , " binary2" )
205
289
206
290
def time_mixed_binary (self ):
207
291
parse (io .BytesIO (self .binary_data ))
@@ -218,13 +302,13 @@ def setup(self):
218
302
[
219
303
ra_data [:SMALL_SIZE ],
220
304
dec_data [:SMALL_SIZE ],
221
- mag_data [:SMALL_SIZE ]
305
+ mag_data [:SMALL_SIZE ],
222
306
],
223
- names = ['ra' , ' dec' , ' mag' ]
307
+ names = ["ra" , " dec" , " mag" ],
224
308
)
225
309
226
- self .binary_data = create_votable_bytes (table , ' binary' )
227
- self .binary2_data = create_votable_bytes (table , ' binary2' )
310
+ self .binary_data = create_votable_bytes (table , " binary" )
311
+ self .binary2_data = create_votable_bytes (table , " binary2" )
228
312
229
313
def time_small_binary (self ):
230
314
parse (io .BytesIO (self .binary_data ))
0 commit comments