Skip to content

Commit

Permalink
Merge pull request #18 from cancervariants/staging
Browse files Browse the repository at this point in the history
Staging
  • Loading branch information
korikuzma authored Feb 22, 2022
2 parents 8abd938 + 920fc61 commit f9126ee
Show file tree
Hide file tree
Showing 7 changed files with 49 additions and 35 deletions.
4 changes: 2 additions & 2 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ name = "pypi"
pydantic = "*"
requests = "*"
bravado = "*"
pandas = "*"
fastapi = "*"
boto3 = "*"
xlrd = "*"

[dev-packages]
evidence = {editable = true, path = "."}
Expand All @@ -26,5 +26,5 @@ ipykernel = "*"
variation-normalizer = "*"
click = "*"
openpyxl = "*"
xlrd = "*"
pandas = "*"
xlwt = "*"
55 changes: 36 additions & 19 deletions evidence/data_sources/cancer_hotspots.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
from os import remove
import shutil
from pathlib import Path
from typing import Dict, Optional
from typing import Dict, Optional, List

import pandas as pd
import xlrd
import boto3
from botocore.config import Config

Expand Down Expand Up @@ -64,10 +64,11 @@ def __init__(
raise FileNotFoundError(
"Unable to retrieve path for normalized Cancer Hotspots data")

self.snv_hotspots = pd.read_excel(
self.normalized_data_path, sheet_name=self.og_snv_sheet_name)
self.indel_hotspots = pd.read_excel(
self.normalized_data_path, sheet_name=self.og_indel_sheet_name)
wb = xlrd.open_workbook(self.normalized_data_path)
self.snv_hotspots = wb.sheet_by_name(self.og_snv_sheet_name)
self.snv_headers = self.snv_hotspots.row_values(0)
self.indel_hotspots = wb.sheet_by_name(self.og_indel_sheet_name)
self.indel_headers = self.indel_hotspots.row_values(0)

def get_normalized_data_path(self) -> None:
"""Download latest normalized data from public s3 bucket if it does not already
Expand Down Expand Up @@ -116,26 +117,42 @@ def mutation_hotspots(self, so_id: str, vrs_variation_id: str) -> Response:
source_meta_=self.source_meta
)

@staticmethod
def get_row(sheet: xlrd.sheet.Sheet, vrs_identifier: str) -> Optional[List]:
"""Get row from xls sheet if vrs_identifier matches value in last column
:param xlrd.sheet.Sheet sheet: The sheet to use
:param str vrs_identifier: The vrs_identifier to match on
:return: Row represented as a list if vrs_identifier match was found, else None
"""
row = None
for row_idx in range(1, sheet.nrows):
tmp_row = sheet.row_values(row_idx)
if tmp_row[-1] == vrs_identifier:
row = tmp_row
break
return row

def query_snv_hotspots(self, vrs_variation_id: str) -> Optional[Dict]:
"""Return data for SNV
:param str vrs_variation_id: VRS digest for variation
:return: SNV data for vrs_variation_id
"""
df = self.snv_hotspots.loc[self.snv_hotspots["vrs_identifier"] == vrs_variation_id] # noqa: E501
if df.empty:
row = self.get_row(self.snv_hotspots, vrs_variation_id)
if not row:
return None

ref = df["ref"].item()
pos = df["Amino_Acid_Position"].item()
alt = df["Variant_Amino_Acid"].item()
ref = row[self.snv_headers.index("ref")]
pos = row[self.snv_headers.index("Amino_Acid_Position")]
alt = row[self.snv_headers.index("Variant_Amino_Acid")]
mutation, observations = alt.split(":")
return {
"codon": f"{ref}{pos}",
"mutation": f"{ref}{pos}{mutation}",
"q_value": df["qvalue"].item(),
"q_value": row[self.snv_headers.index("qvalue")],
"observations": int(observations),
"total_observations": int(df["Mutation_Count"].item())
"total_observations": int(row[self.snv_headers.index("Mutation_Count")])
}

def query_indel_hotspots(self, vrs_variation_id: str) -> Optional[Dict]:
Expand All @@ -144,17 +161,17 @@ def query_indel_hotspots(self, vrs_variation_id: str) -> Optional[Dict]:
:param str vrs_variation_id: VRS digest for variation
:return: INDEL data for vrs_variation_id
"""
df = self.indel_hotspots.loc[self.indel_hotspots["vrs_identifier"] == vrs_variation_id] # noqa: E501
if df.empty:
row = self.get_row(self.indel_hotspots, vrs_variation_id)
if not row:
return None

pos = df["Amino_Acid_Position"].item()
alt = df["Variant_Amino_Acid"].item()
pos = row[self.indel_headers.index("Amino_Acid_Position")]
alt = row[self.indel_headers.index("Variant_Amino_Acid")]
mutation, observations = alt.split(":")
return {
"codon": pos,
"mutation": mutation,
"q_value": df["qvalue"].item(),
"q_value": row[self.indel_headers.index("qvalue")],
"observations": int(observations),
"total_observations": int(df["Mutation_Count"].item())
"total_observations": int(row[self.indel_headers.index("Mutation_Count")])
}
2 changes: 1 addition & 1 deletion evidence/dev/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def add_vrs_identifier_to_data(ch: CancerHotspots) -> None:

today = datetime.strftime(datetime.today(), "%Y%m%d")
ch.normalized_data_path = \
ch.src_dir_path / f"normalized_hotspots_v{ch.source_meta.version}_{today}.xlsx" # noqa: E501
ch.src_dir_path / f"normalized_hotspots_v{ch.source_meta.version}_{today}.xls" # noqa: E501
with pd.ExcelWriter(ch.normalized_data_path) as writer:
snv_hotspots.to_excel(
writer, sheet_name=ch.og_snv_sheet_name, index=False)
Expand Down
2 changes: 1 addition & 1 deletion evidence/version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
"""Define evidence-normalizer version"""
__version__ = "0.0.3a1"
__version__ = "0.0.3a2"
10 changes: 4 additions & 6 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ beautifulsoup4==4.10.0; python_version >= '3.1'
biocommons.seqrepo==0.6.5; python_version >= '3.5'
bioutils==0.5.5; python_version >= '3.6'
black==22.1.0; python_full_version >= '3.6.2'
boto3==1.21.3
botocore==1.24.3; python_version >= '3.6'
boto3==1.21.4
botocore==1.24.4; python_version >= '3.6'
bravado-core==5.17.0; python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'
bravado==11.0.3
bs4==0.0.1
Expand Down Expand Up @@ -59,7 +59,6 @@ ga4gh.vrs[extras]==0.7.2; python_version >= '3.6'
ga4gh.vrsatile.pydantic==0.0.9; python_version >= '3.8'
gene-normalizer==0.1.24; python_version >= '3.8'
gffutils==0.10.1
greenlet==1.1.2; python_version >= '3' and platform_machine == 'aarch64' or (platform_machine == 'ppc64le' or (platform_machine == 'x86_64' or (platform_machine == 'amd64' or (platform_machine == 'AMD64' or (platform_machine == 'win32' or platform_machine == 'WIN32')))))
h11==0.13.0; python_version >= '3.6'
hgvs==1.5.2
humanfriendly==10.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
Expand Down Expand Up @@ -114,7 +113,7 @@ pyflakes==2.4.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.
pygments==2.11.2; python_version >= '3.5'
pyliftover==0.4
pyparsing==3.0.7; python_version >= '3.6'
pyppeteer==1.0.2; python_version >= '3.7' and python_version < '4.0'
pyppeteer==1.0.2; python_version >= '3.7' and python_version < '4'
pyquery==1.4.3
pyrsistent==0.18.1; python_version >= '3.7'
pysam==0.18.0
Expand All @@ -136,7 +135,6 @@ six==1.16.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3
sniffio==1.2.0; python_version >= '3.5'
snowballstemmer==2.2.0
soupsieve==2.3.1; python_version >= '3.6'
sqlalchemy==1.4.31; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'
sqlparse==0.4.2; python_version >= '3.5'
stack-data==0.2.0
starlette==0.17.1; python_version >= '3.6'
Expand All @@ -152,7 +150,7 @@ typing-extensions==4.1.1; python_version >= '3.6'
uri-template==1.1.0
urllib3==1.26.8; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'
uvicorn==0.17.5; python_version >= '3.7'
variation-normalizer==0.2.19
variation-normalizer==0.2.20
virtualenv==20.13.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
w3lib==1.22.0
wcwidth==0.2.5
Expand Down
7 changes: 3 additions & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
anyio==3.5.0; python_full_version >= '3.6.2'
arrow==1.2.2; python_version >= '3.6'
attrs==21.4.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
boto3==1.21.3
botocore==1.24.3; python_version >= '3.6'
boto3==1.21.4
botocore==1.24.4; python_version >= '3.6'
bravado-core==5.17.0; python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'
bravado==11.0.3
certifi==2021.10.8
Expand All @@ -25,8 +25,6 @@ jsonref==0.2
jsonschema[format]==4.4.0; python_version >= '3.7'
monotonic==1.6
msgpack==1.0.3
numpy==1.22.2; python_version < '3.10' and platform_machine != 'aarch64' and platform_machine != 'arm64'
pandas==1.4.1
pydantic==1.9.0
pyrsistent==0.18.1; python_version >= '3.7'
python-dateutil==2.8.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
Expand All @@ -45,3 +43,4 @@ typing-extensions==4.1.1; python_version >= '3.6'
uri-template==1.1.0
urllib3==1.26.8; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'
webcolors==1.11.1
xlrd==2.0.1
4 changes: 2 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ install_requires =
pydantic
requests
bravado
pandas
xlrd
fastapi
boto3

Expand All @@ -40,8 +40,8 @@ dev =
variation-normalizer
click
openpyxl
xlrd
xlwt
pandas

[tool:pytest]
addopts = --ignore setup.py --doctest-modules --cov-report term-missing --cov .

0 comments on commit f9126ee

Please sign in to comment.