Skip to content

Add converters benchmark and add Bitarray column test for votable #142

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 58 additions & 6 deletions benchmarks/votable.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
"""Benchmarks for VOTable binary/binary2 parsing performance."""
import io
import os
import tempfile

import numpy as np
from astropy.io.votable import parse, from_table
from astropy.table import Table

np.random.seed(42)
rng = np.random.default_rng(42)

SMALL_SIZE = 1000
LARGE_SIZE = 200000
Expand All @@ -20,6 +18,7 @@
id_data = np.arange(LARGE_SIZE, dtype=np.int64)
flag_data = np.random.choice([True, False], LARGE_SIZE)
quality_data = np.random.randint(0, 256, LARGE_SIZE, dtype=np.uint8)
bool_data = rng.integers(0, 2, LARGE_SIZE, dtype=bool)

short_names = np.array([f"OBJ_{i:08d}" for i in range(LARGE_SIZE)])
filter_names = np.random.choice(['u', 'g', 'r', 'i', 'z', 'Y'], LARGE_SIZE)
Expand All @@ -32,9 +31,18 @@
])


def create_votable_bytes(table_data, format_type='binary2'):
def create_votable_bytes(
table_data,
format_type="binary2",
bitarray_size=None):
"""Helper to create VOTables with a specific serialization."""
votable = from_table(table_data)

if bitarray_size is not None:
for field in votable.get_first_table().fields:
if field.datatype == "bit":
field.arraysize = str(bitarray_size)

output = io.BytesIO()
votable.to_xml(output, tabledata_format=format_type)
return output.getvalue()
Expand All @@ -57,8 +65,10 @@ def setup(self):
names=['ra', 'dec', 'mag', 'flux', 'counts', 'id', 'quality']
)

self.binary_data = create_votable_bytes(table, 'binary')
self.binary2_data = create_votable_bytes(table, 'binary2')
self.binary_data = create_votable_bytes(
table, "binary", bitarray_size=8)
self.binary2_data = create_votable_bytes(
table, "binary2", bitarray_size=8)

def time_numeric_binary(self):
parse(io.BytesIO(self.binary_data))
Expand Down Expand Up @@ -177,6 +187,48 @@ def time_booleans_binary2(self):
parse(io.BytesIO(self.binary2_data))


class TimeVOTableBitArrayOptimization:
"""Benchmark BitArray columns in Binary/Binary2 VOTables."""

def setup(self):
table = Table(
{
"ra": ra_data[:LARGE_SIZE],
"dec": dec_data[:LARGE_SIZE],
"mag": mag_data[:LARGE_SIZE],
"detected": rng.integers(0, 2, LARGE_SIZE).astype(bool),
"saturated": rng.integers(0, 2, LARGE_SIZE).astype(bool),
"edge_pixel": rng.integers(0, 2, LARGE_SIZE).astype(bool),
"cosmic_ray": rng.integers(0, 2, LARGE_SIZE).astype(bool),
}
)

self.binary_bitarray_8_data = create_votable_bytes(
table, "binary", "8")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's some new code here that hasn't been formatted with Ruff. New code should be formatted with Ruff so that when Ruff is adopted in this repository then the patch to update the formatting would be smaller.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This isn't core library. We never even discussed PEP 8 here, much less ruff. So I wouldn't suddenly enforce that now as a rule that would block merge here.

self.binary_bitarray_16_data = create_votable_bytes(
table, "binary", "16")
self.binary2_bitarray_8_data = create_votable_bytes(
table, "binary2", "8")
self.binary2_bitarray_16_data = create_votable_bytes(
table, "binary2", "16")

def time_bitarray_8bit_binary(self):
"""Parse BitArray with 8-bit arraysize."""
parse(io.BytesIO(self.binary_bitarray_8_data))

def time_bitarray_16bit_binary(self):
"""Parse BitArray with 16-bit arraysize."""
parse(io.BytesIO(self.binary_bitarray_16_data))

def time_bitarray_8bit_binary2(self):
"""Parse binary2 BitArray with 8-bit arraysize."""
parse(io.BytesIO(self.binary2_bitarray_8_data))

def time_bitarray_16bit_binary2(self):
"""Parse binary2 BitArray with 16-bit arraysize."""
parse(io.BytesIO(self.binary2_bitarray_16_data))


class TimeVOTableMixed:
"""Benchmark for a table with mixed fields types."""

Expand Down
37 changes: 37 additions & 0 deletions benchmarks/votable_converters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import numpy as np
import numpy.ma as ma
from astropy.io.votable.converters import bool_to_bitarray, bitarray_to_bool

SMALL_SIZE = 1000
LARGE_SIZE = 100000


class TimeBitArrayConverters:
"""Direct converter function benchmarks."""

def setup(self):
rng = np.random.default_rng(42)

self.small_bool = rng.integers(0, 2, SMALL_SIZE, dtype=bool)
self.large_bool = rng.integers(0, 2, LARGE_SIZE, dtype=bool)

mask = rng.random(LARGE_SIZE) < 0.2
self.masked_bool = ma.array(self.large_bool, mask=mask)

self.small_bits = bool_to_bitarray(self.small_bool)
self.large_bits = bool_to_bitarray(self.large_bool)

def time_bool_to_bitarray_small(self):
bool_to_bitarray(self.small_bool)

def time_bool_to_bitarray_large(self):
bool_to_bitarray(self.large_bool)

def time_bool_to_bitarray_masked(self):
bool_to_bitarray(self.masked_bool)

def time_bitarray_to_bool_small(self):
bitarray_to_bool(self.small_bits, len(self.small_bool))

def time_bitarray_to_bool_large(self):
bitarray_to_bool(self.large_bits, len(self.large_bool))