Skip to content

Commit

Permalink
enable codspeed
Browse files Browse the repository at this point in the history
update benchmarks

port ASV time benchmarks to pytest-benchmark, keep memory usage ones for now

simplify benchmarks

run codspeed for python

[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

add tox installation
  • Loading branch information
luizirber committed Jul 25, 2024
1 parent f621726 commit 19a0ccd
Show file tree
Hide file tree
Showing 5 changed files with 226 additions and 126 deletions.
19 changes: 19 additions & 0 deletions .github/workflows/codspeed.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,22 @@ jobs:
with:
run: "cd src/core && cargo codspeed run"
token: ${{ secrets.CODSPEED_TOKEN }}

benchmarks-python:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v3
with:
python-version: "3.12"

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install tox
- name: Run benchmarks
uses: CodSpeedHQ/action@v2
with:
token: ${{ secrets.CODSPEED_TOKEN }}
run: tox -e codspeed
124 changes: 0 additions & 124 deletions benchmarks/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,74 +33,6 @@ def load_sequences():
return sequences


class TimeMinHashSuite:
def setup(self):
self.mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=False)
self.protein_mh = MinHash(
MINHASH_NUM, MINHASH_K, is_protein=True, track_abundance=False
)
self.sequences = load_sequences()

self.populated_mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=False)
for seq in self.sequences:
self.populated_mh.add_sequence(seq)

def time_add_sequence(self):
mh = self.mh
sequences = self.sequences
for seq in sequences:
mh.add_sequence(seq)

def time_add_protein(self):
mh = self.protein_mh
sequences = self.sequences
for seq in sequences:
mh.add_protein(seq)

def time_get_mins(self):
mh = self.populated_mh
for i in range(GET_MINS_RANGE):
mh.get_mins()

def time_add_hash(self):
mh = self.mh
for i in range(ADD_HASH_RANGE):
mh.add_hash(i)

def time_add_many(self):
mh = self.mh
mh.add_many(list(range(ADD_MANY_RANGE)))

def time_similarity(self):
mh = self.mh
other_mh = self.populated_mh
for i in range(SIMILARITY_TIMES):
mh.similarity(other_mh)

def time_count_common(self):
mh = self.mh
other_mh = self.populated_mh
for i in range(COUNT_COMMON_TIMES):
mh.count_common(other_mh)

def time_merge(self):
mh = self.mh
other_mh = self.populated_mh
for i in range(MERGE_TIMES):
mh.merge(other_mh)

def time_copy(self):
mh = self.populated_mh
for i in range(COPY_TIMES):
mh.__copy__()

def time_concat(self):
mh = self.mh
other_mh = self.populated_mh
for i in range(CONCAT_TIMES):
mh += other_mh


class PeakmemMinHashSuite:
def setup(self):
self.mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True)
Expand Down Expand Up @@ -134,33 +66,6 @@ def peakmem_add_many(self):
####################


class TimeMinAbundanceSuite(TimeMinHashSuite):
def setup(self):
TimeMinHashSuite.setup(self)
self.mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True)

self.populated_mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True)
for seq in self.sequences:
self.populated_mh.add_sequence(seq)

def time_get_mins_abundance(self):
mh = self.populated_mh
for i in range(GET_MINS_RANGE):
mh.get_mins(with_abundance=True)

def time_set_abundances(self):
mh = self.mh
mins = self.populated_mh.get_mins(with_abundance=True)
for i in range(SET_ABUNDANCES_RANGE):
mh.set_abundances(mins)

def time_set_abundances_noclear(self):
mh = self.mh
mins = self.populated_mh.get_mins(with_abundance=True)
for i in range(SET_ABUNDANCES_RANGE):
mh.set_abundances(mins, clear=False)


class PeakmemMinAbundanceSuite(PeakmemMinHashSuite):
def setup(self):
PeakmemMinHashSuite.setup(self)
Expand All @@ -170,35 +75,6 @@ def setup(self):
####################


class TimeZipStorageSuite:
def setup(self):
import zipfile

self.zipfile = NamedTemporaryFile()

with zipfile.ZipFile(
self.zipfile, mode="w", compression=zipfile.ZIP_STORED
) as storage:
for i in range(ZIP_STORAGE_WRITE):
# just so we have lots of entries
storage.writestr(str(i), b"0")
# one big-ish entry
storage.writestr("sig1", b"9" * 1_000_000)

def time_load_from_zipstorage(self):
with ZipStorage(self.zipfile.name) as storage:
for i in range(ZIP_STORAGE_LOAD):
storage.load("sig1")

def time_load_small_from_zipstorage(self):
with ZipStorage(self.zipfile.name) as storage:
for i in range(ZIP_STORAGE_LOAD):
storage.load("99999")

def teardown(self):
self.zipfile.close()


class PeakmemZipStorageSuite:
def setup(self):
import zipfile
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ test = [
"pytest>=6.2.4,<8.4.0",
"pytest-cov>=4,<6.0",
"pytest-xdist>=3.1",
"pytest-benchmark>=4.0",
"pyyaml>=6,<7",
"diff-cover>=7.3",
"covdefaults>=2.2.2",
Expand Down
176 changes: 176 additions & 0 deletions tests/test_benchmarks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
import random
from tempfile import NamedTemporaryFile

import pytest

from sourmash.sbt_storage import ZipStorage
from sourmash.minhash import MinHash

RANDOM_SEQ_SIZE = 3000
RANDOM_SEQ_NUMBER = 300

MINHASH_NUM = 500
MINHASH_K = 21

GET_MINS_RANGE = 500
ADD_HASH_RANGE = 10_000
ADD_MANY_RANGE = 1000
SIMILARITY_TIMES = 500
COUNT_COMMON_TIMES = 500
MERGE_TIMES = 500
COPY_TIMES = 500
CONCAT_TIMES = 500
SET_ABUNDANCES_RANGE = 500
ZIP_STORAGE_WRITE = 100_000
ZIP_STORAGE_LOAD = 20


def load_sequences():
sequences = []
for _ in range(10):
random_seq = random.sample(
"A,C,G,T".split(",") * RANDOM_SEQ_SIZE, RANDOM_SEQ_NUMBER
)
sequences.append("".join(random_seq))
return sequences


@pytest.fixture
def mh():
return MinHash(MINHASH_NUM, MINHASH_K, track_abundance=False)


@pytest.fixture
def mh_protein():
return MinHash(MINHASH_NUM, MINHASH_K, is_protein=True, track_abundance=False)


@pytest.fixture
def sequences():
return load_sequences()


@pytest.fixture
def populated_mh(sequences):
populated_mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=False)
for seq in sequences:
populated_mh.add_sequence(seq)
return populated_mh


def test_add_sequence(benchmark, mh, sequences):
@benchmark
def bench():
for seq in sequences:
mh.add_sequence(seq)


def test_add_protein(benchmark, mh_protein, sequences):
@benchmark
def bench():
for seq in sequences:
mh_protein.add_protein(seq)


def test_get_mins(benchmark, populated_mh):
benchmark(populated_mh.get_mins)


def test_add_hash(benchmark, mh):
@benchmark
def bench():
for i in range(ADD_HASH_RANGE):
mh.add_hash(i)


def test_add_many(benchmark, mh):
benchmark(mh.add_many, list(range(ADD_MANY_RANGE)))


def test_similarity(benchmark, mh, populated_mh):
benchmark(mh.similarity, populated_mh)


def test_count_common(benchmark, mh, populated_mh):
benchmark(mh.count_common, populated_mh)


def test_merge(benchmark, mh, populated_mh):
benchmark(mh.merge, populated_mh)


def test_copy(benchmark, populated_mh):
benchmark(populated_mh.__copy__)


def test_concat(benchmark, mh, populated_mh):
benchmark(mh.__iadd__, populated_mh)


####################


def setup(self):
TimeMinHashSuite.setup(self)
self.mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True)

self.populated_mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True)
for seq in self.sequences:
self.populated_mh.add_sequence(seq)


def time_get_mins_abundance(self):
mh = self.populated_mh
for i in range(GET_MINS_RANGE):
mh.get_mins(with_abundance=True)


def time_set_abundances(self):
mh = self.mh
mins = self.populated_mh.get_mins(with_abundance=True)
for i in range(SET_ABUNDANCES_RANGE):
mh.set_abundances(mins)


def time_set_abundances_noclear(self):
mh = self.mh
mins = self.populated_mh.get_mins(with_abundance=True)
for i in range(SET_ABUNDANCES_RANGE):
mh.set_abundances(mins, clear=False)


####################


@pytest.fixture
def zipstore():
import zipfile

zf = NamedTemporaryFile()

with zipfile.ZipFile(zf, mode="w", compression=zipfile.ZIP_STORED) as storage:
for i in range(ZIP_STORAGE_WRITE):
# just so we have lots of entries
storage.writestr(str(i), b"0")
# one big-ish entry
storage.writestr("sig1", b"9" * 1_000_000)

yield zf

zf.close()


def test_load_from_zipstorage(benchmark, zipstore):
@benchmark
def bench():
with ZipStorage(zipstore.name) as storage:
for _ in range(ZIP_STORAGE_LOAD):
storage.load("sig1")


def test_load_small_from_zipstorage(benchmark, zipstore):
@benchmark
def bench():
with ZipStorage(zipstore.name) as storage:
for _ in range(ZIP_STORAGE_LOAD):
storage.load("99999")
Loading

0 comments on commit 19a0ccd

Please sign in to comment.