Skip to content

Commit f76268c

Browse files
committed
Add converters benchmark and add Bitarray column test for votable
1 parent 6a4e0f3 commit f76268c

File tree

2 files changed

+163
-43
lines changed

2 files changed

+163
-43
lines changed

benchmarks/votable.py

Lines changed: 127 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
11
"""Benchmarks for VOTable binary/binary2 parsing performance."""
22
import io
3-
import os
4-
import tempfile
5-
63
import numpy as np
74
from astropy.io.votable import parse, from_table
85
from astropy.table import Table
@@ -20,21 +17,33 @@
2017
id_data = np.arange(LARGE_SIZE, dtype=np.int64)
2118
flag_data = np.random.choice([True, False], LARGE_SIZE)
2219
quality_data = np.random.randint(0, 256, LARGE_SIZE, dtype=np.uint8)
20+
bool_data = np.random.randint(0, 2, LARGE_SIZE).astype(bool)
2321

2422
short_names = np.array([f"OBJ_{i:08d}" for i in range(LARGE_SIZE)])
25-
filter_names = np.random.choice(['u', 'g', 'r', 'i', 'z', 'Y'], LARGE_SIZE)
23+
filter_names = np.random.choice(["u", "g", "r", "i", "z", "Y"], LARGE_SIZE)
2624
classifications = np.random.choice(
27-
['STAR', 'GALAXY', 'QSO', 'UNKNOWN'], LARGE_SIZE
25+
["STAR", "GALAXY", "QSO", "UNKNOWN"], LARGE_SIZE)
26+
long_descriptions = np.array(
27+
[
28+
f"Extend description about a field {i // 1000:04d}"
29+
for i in range(LARGE_SIZE)
30+
]
2831
)
29-
long_descriptions = np.array([
30-
f"Extend description about a field {i//1000:04d}"
31-
for i in range(LARGE_SIZE)
32-
])
3332

3433

35-
def create_votable_bytes(table_data, format_type='binary2'):
34+
def create_votable_bytes(
35+
table_data,
36+
format_type="binary2",
37+
bitarray_size=None):
3638
"""Helper to create VOTables with a specific serialization."""
3739
votable = from_table(table_data)
40+
41+
if bitarray_size is not None:
42+
first_table = votable.get_first_table()
43+
for field in first_table.fields:
44+
if field.datatype == "bit":
45+
field.arraysize = str(bitarray_size)
46+
3847
output = io.BytesIO()
3948
votable.to_xml(output, tabledata_format=format_type)
4049
return output.getvalue()
@@ -52,13 +61,15 @@ def setup(self):
5261
flux_data[:LARGE_SIZE],
5362
count_data[:LARGE_SIZE],
5463
id_data[:LARGE_SIZE],
55-
quality_data[:LARGE_SIZE]
64+
quality_data[:LARGE_SIZE],
5665
],
57-
names=['ra', 'dec', 'mag', 'flux', 'counts', 'id', 'quality']
66+
names=["ra", "dec", "mag", "flux", "counts", "id", "quality"],
5867
)
5968

60-
self.binary_data = create_votable_bytes(table, 'binary')
61-
self.binary2_data = create_votable_bytes(table, 'binary2')
69+
self.binary_data = create_votable_bytes(
70+
table, "binary", bitarray_size=8)
71+
self.binary2_data = create_votable_bytes(
72+
table, "binary2", bitarray_size=8)
6273

6374
def time_numeric_binary(self):
6475
parse(io.BytesIO(self.binary_data))
@@ -78,13 +89,13 @@ def setup(self):
7889
short_names[:LARGE_SIZE],
7990
filter_names[:LARGE_SIZE],
8091
classifications[:LARGE_SIZE],
81-
mag_data[:LARGE_SIZE]
92+
mag_data[:LARGE_SIZE],
8293
],
83-
names=['ra', 'dec', 'object_id', 'filter', 'class', 'mag']
94+
names=["ra", "dec", "object_id", "filter", "class", "mag"],
8495
)
8596

86-
self.binary_data = create_votable_bytes(table, 'binary')
87-
self.binary2_data = create_votable_bytes(table, 'binary2')
97+
self.binary_data = create_votable_bytes(table, "binary")
98+
self.binary2_data = create_votable_bytes(table, "binary2")
8899

89100
def time_short_strings_binary(self):
90101
parse(io.BytesIO(self.binary_data))
@@ -102,13 +113,13 @@ def setup(self):
102113
ra_data[:LARGE_SIZE],
103114
dec_data[:LARGE_SIZE],
104115
long_descriptions[:LARGE_SIZE],
105-
mag_data[:LARGE_SIZE]
116+
mag_data[:LARGE_SIZE],
106117
],
107-
names=['ra', 'dec', 'description', 'mag']
118+
names=["ra", "dec", "description", "mag"],
108119
)
109120

110-
self.binary_data = create_votable_bytes(table, 'binary')
111-
self.binary2_data = create_votable_bytes(table, 'binary2')
121+
self.binary_data = create_votable_bytes(table, "binary")
122+
self.binary2_data = create_votable_bytes(table, "binary2")
112123

113124
def time_long_strings_binary(self):
114125
parse(io.BytesIO(self.binary_data))
@@ -126,19 +137,25 @@ def setup(self):
126137
short_names[:LARGE_SIZE],
127138
filter_names[:LARGE_SIZE],
128139
classifications[:LARGE_SIZE],
129-
np.random.choice(['A', 'B', 'C', 'D'], LARGE_SIZE),
130-
np.random.choice(['HIGH', 'MED', 'LOW'], LARGE_SIZE),
140+
np.random.choice(["A", "B", "C", "D"], LARGE_SIZE),
141+
np.random.choice(["HIGH", "MED", "LOW"], LARGE_SIZE),
131142
long_descriptions[:LARGE_SIZE],
132143
ra_data[:LARGE_SIZE],
133-
dec_data[:LARGE_SIZE]
144+
dec_data[:LARGE_SIZE],
134145
],
135146
names=[
136-
'id', 'filter', 'class', 'grade',
137-
'priority', 'desc', 'ra', 'dec'
138-
]
147+
"id",
148+
"filter",
149+
"class",
150+
"grade",
151+
"priority",
152+
"desc",
153+
"ra",
154+
"dec",
155+
],
139156
)
140157

141-
self.binary2_data = create_votable_bytes(table, 'binary2')
158+
self.binary2_data = create_votable_bytes(table, "binary2")
142159

143160
def time_string_intensive_binary2(self):
144161
parse(io.BytesIO(self.binary2_data))
@@ -162,13 +179,21 @@ def setup(self):
162179
np.random.choice([True, False], LARGE_SIZE),
163180
],
164181
names=[
165-
'ra', 'dec', 'saturated', 'flagged', 'edge_pixel',
166-
'cosmic_ray', 'variable', 'extended', 'public', 'calibrated'
167-
]
182+
"ra",
183+
"dec",
184+
"saturated",
185+
"flagged",
186+
"edge_pixel",
187+
"cosmic_ray",
188+
"variable",
189+
"extended",
190+
"public",
191+
"calibrated",
192+
],
168193
)
169194

170-
self.binary_data = create_votable_bytes(table, 'binary')
171-
self.binary2_data = create_votable_bytes(table, 'binary2')
195+
self.binary_data = create_votable_bytes(table, "binary")
196+
self.binary2_data = create_votable_bytes(table, "binary2")
172197

173198
def time_booleans_binary(self):
174199
parse(io.BytesIO(self.binary_data))
@@ -177,6 +202,57 @@ def time_booleans_binary2(self):
177202
parse(io.BytesIO(self.binary2_data))
178203

179204

205+
class TimeVOTableBitArrayOptimization:
206+
"""Benchmark BitArray columns in Binary/Binary2 VOTables."""
207+
208+
def setup(self):
209+
table = Table(
210+
[
211+
ra_data[:LARGE_SIZE],
212+
dec_data[:LARGE_SIZE],
213+
mag_data[:LARGE_SIZE],
214+
np.random.randint(0, 2, LARGE_SIZE).astype(bool),
215+
np.random.randint(0, 2, LARGE_SIZE).astype(bool),
216+
np.random.randint(0, 2, LARGE_SIZE).astype(bool),
217+
np.random.randint(0, 2, LARGE_SIZE).astype(bool),
218+
],
219+
names=[
220+
"ra",
221+
"dec",
222+
"mag",
223+
"detected",
224+
"saturated",
225+
"edge_pixel",
226+
"cosmic_ray",
227+
],
228+
)
229+
230+
self.binary_bitarray_8_data = create_votable_bytes(
231+
table, "binary", "8")
232+
self.binary_bitarray_16_data = create_votable_bytes(
233+
table, "binary", "16")
234+
self.binary2_bitarray_8_data = create_votable_bytes(
235+
table, "binary2", "8")
236+
self.binary2_bitarray_16_data = create_votable_bytes(
237+
table, "binary2", "16")
238+
239+
def time_bitarray_8bit_binary(self):
240+
"""Parse BitArray with 8-bit arraysize."""
241+
parse(io.BytesIO(self.binary_bitarray_8_data))
242+
243+
def time_bitarray_16bit_binary(self):
244+
"""Parse BitArray with 16-bit arraysize."""
245+
parse(io.BytesIO(self.binary_bitarray_16_data))
246+
247+
def time_bitarray_8bit_binary2(self):
248+
"""Parse binary2 BitArray with 8-bit arraysize."""
249+
parse(io.BytesIO(self.binary2_bitarray_8_data))
250+
251+
def time_bitarray_16bit_binary2(self):
252+
"""Parse binary2 BitArray with 16-bit arraysize."""
253+
parse(io.BytesIO(self.binary2_bitarray_16_data))
254+
255+
180256
class TimeVOTableMixed:
181257
"""Benchmark for a table with mixed fields types."""
182258

@@ -195,13 +271,21 @@ def setup(self):
195271
flag_data[:LARGE_SIZE],
196272
],
197273
names=[
198-
'ra', 'dec', 'id', 'mag', 'flux',
199-
'filter', 'class', 'counts', 'quality', 'detected'
200-
]
274+
"ra",
275+
"dec",
276+
"id",
277+
"mag",
278+
"flux",
279+
"filter",
280+
"class",
281+
"counts",
282+
"quality",
283+
"detected",
284+
],
201285
)
202286

203-
self.binary_data = create_votable_bytes(table, 'binary')
204-
self.binary2_data = create_votable_bytes(table, 'binary2')
287+
self.binary_data = create_votable_bytes(table, "binary")
288+
self.binary2_data = create_votable_bytes(table, "binary2")
205289

206290
def time_mixed_binary(self):
207291
parse(io.BytesIO(self.binary_data))
@@ -218,13 +302,13 @@ def setup(self):
218302
[
219303
ra_data[:SMALL_SIZE],
220304
dec_data[:SMALL_SIZE],
221-
mag_data[:SMALL_SIZE]
305+
mag_data[:SMALL_SIZE],
222306
],
223-
names=['ra', 'dec', 'mag']
307+
names=["ra", "dec", "mag"],
224308
)
225309

226-
self.binary_data = create_votable_bytes(table, 'binary')
227-
self.binary2_data = create_votable_bytes(table, 'binary2')
310+
self.binary_data = create_votable_bytes(table, "binary")
311+
self.binary2_data = create_votable_bytes(table, "binary2")
228312

229313
def time_small_binary(self):
230314
parse(io.BytesIO(self.binary_data))

benchmarks/votable_converters.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import numpy as np
2+
import numpy.ma as ma
3+
from astropy.io.votable.converters import bool_to_bitarray, bitarray_to_bool
4+
5+
SMALL_SIZE = 1000
6+
LARGE_SIZE = 100000
7+
8+
9+
class TimeBitArrayConverters:
10+
"""Direct converter function benchmarks."""
11+
12+
def setup(self):
13+
14+
self.small_bool = np.random.randint(0, 2, SMALL_SIZE).astype(bool)
15+
self.large_bool = np.random.randint(0, 2, LARGE_SIZE).astype(bool)
16+
17+
mask = np.random.random(LARGE_SIZE) < 0.2
18+
self.masked_bool = ma.array(self.large_bool, mask=mask)
19+
20+
self.small_bits = bool_to_bitarray(self.small_bool)
21+
self.large_bits = bool_to_bitarray(self.large_bool)
22+
23+
def time_bool_to_bitarray_small(self):
24+
bool_to_bitarray(self.small_bool)
25+
26+
def time_bool_to_bitarray_large(self):
27+
bool_to_bitarray(self.large_bool)
28+
29+
def time_bool_to_bitarray_masked(self):
30+
bool_to_bitarray(self.masked_bool)
31+
32+
def time_bitarray_to_bool_small(self):
33+
bitarray_to_bool(self.small_bits, len(self.small_bool))
34+
35+
def time_bitarray_to_bool_large(self):
36+
bitarray_to_bool(self.large_bits, len(self.large_bool))

0 commit comments

Comments
 (0)