Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tests #1

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions src/efs_distributed/measurements/measurementManager.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from aiocache.serializers import JsonSerializer
import pandas as pd
from .measurementClass import DbMeasurement, FileMeasurement, ComputedMeasurement
from efs_trackhub import TrackHub
from efs_parser import GtfParsedFile, TbxFile, BigBed
import ujson
import requests
Expand Down Expand Up @@ -267,7 +266,7 @@ def import_files(self, fileSource, fileHandler=None, genome=None):
json_string = f.read()

records = ujson.loads(json_string)
self.import_records(records, fileHandler=fileHandler, genome=genome)
return self.import_records(records, fileHandler=fileHandler, genome=genome)

def import_records(self, records, fileHandler=None, genome=None):
"""Import measurements from a list of records (usually from a decoded json string)
Expand Down
63 changes: 63 additions & 0 deletions tests/test_distributed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import pytest
import os
from efs_distributed.handler import FileHandlerProcess
from efs_distributed.measurements import MeasurementManager
from distributed import Client
import asyncio
import sys
records = [{
'url': 'https://egg2.wustl.edu/roadmap/data/byFileType/signal/consolidated/macs2signal/foldChange/E079-H3K27me3.fc.signal.bigwig',
'file_type': 'bigwig', 'datatype': 'bp', 'name': 'E079-H3K27me3', 'id': 'E079-H3K27me3',
'genome': 'hg19', 'annotation': {'group': 'digestive', 'tissue': 'Esophagus', 'marker': 'H3K27me3'}},
{
'url': 'https://egg2.wustl.edu/roadmap/data/byFileType/signal/consolidated/macs2signal/foldChange/E106-H3K27me3.fc.signal.bigwig',
'file_type': 'bigwig', 'datatype': 'bp', 'name': 'E106-H3K27me3', 'id': 'E106-H3K27me3',
'genome': 'hg19',
'annotation': {'group': 'digestive', 'tissue': 'Sigmoid Colon', 'marker': 'H3K27me3'}}]
mMgr = MeasurementManager()
fms = mMgr.import_records(records, genome="hg19")

res = asyncio.run(fms[0].get_data("chr1", 1000000, 1002000, bins=100))
df = res[0]
def test_columns():
columns = df.columns.tolist()
assert columns==['start', 'end', 'E079-H3K27me3']

def test_range():
start = 1000000
end = 1002000
for _, row in df.iterrows():
assert (row['start'] <= end or row['end'] >= start)


def test_get_bytes():
mMgr = MeasurementManager()
fms = mMgr.import_records(records, genome="hg19")
Comment on lines +34 to +35
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it might be cumbersome to create a mgr and import records everytime inside each test. Probably should use the same objects from Line 17-18

res , err = asyncio.run(fms[0].get_byteRanges("chr1", 1000000, 1002000, bins=100))
df = res
size = sys.getsizeof(df)
assert size == 10727

def test_bin_rows():
mMgr = MeasurementManager()
fms = mMgr.import_records(records, genome="hg19")
res = asyncio.run(fms[0].get_data("chr1", 1000000, 1002000, bins=100))
df = res[0]
rows = len(df)
assert rows == 100

def test_bin_rows_from_file():
mMgr = MeasurementManager()
fms = mMgr.import_files("test_json.json", genome="hg19")
res = asyncio.run(fms[0].get_data("chr1", 1000000, 1002000, bins=100))
df = res[0]
rows = len(df)
assert rows == 100

def test_mean():
mMgr = MeasurementManager()
fms = mMgr.import_records(records, genome="hg19")
res = asyncio.run(fms[0].get_data("chr1", 1000000, 1002000, bins=100))
df = res[0]
mean = df['E079-H3K27me3'].mean()
assert mean == 1.6159404346346855
Comment on lines +57 to +63
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should be a computed mean across two measurements, i.e. defining a computed measurements with numpy.mean as the function

1 change: 1 addition & 0 deletions tests/test_json.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"url": "https://egg2.wustl.edu/roadmap/data/byFileType/signal/consolidated/macs2signal/foldChange/E079-H3K27me3.fc.signal.bigwig", "file_type": "bigwig", "datatype": "bp", "name": "E079-H3K27me3", "id": "E079-H3K27me3", "genome": "hg19", "annotation": {"group": "digestive", "tissue": "Esophagus", "marker": "H3K27me3"}}, {"url": "https://egg2.wustl.edu/roadmap/data/byFileType/signal/consolidated/macs2signal/foldChange/E106-H3K27me3.fc.signal.bigwig", "file_type": "bigwig", "datatype": "bp", "name": "E106-H3K27me3", "id": "E106-H3K27me3", "genome": "hg19", "annotation": {"group": "digestive", "tissue": "Sigmoid Colon", "marker": "H3K27me3"}}]