Skip to content

Commit 5482f44

Browse files
authored
Merge pull request #273 from PNNL-CompBio/211-missing-pyprojecttoml
211 missing pyprojecttoml
2 parents e2a91ed + de26c50 commit 5482f44

File tree

10 files changed

+119
-117
lines changed

10 files changed

+119
-117
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,5 @@ tests/__pycache__
1818
dist
1919
build/lib
2020
build/local
21+
coderdata/_version.py
2122
local/

coderdata/__init__.py

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,9 @@
66
train_test_validate
77
)
88

9-
# '_version.py' will be generated by hatchling once the switch away from
10-
# setuptools.py is finished
11-
try:
12-
from ._version import __version__
13-
except ImportError:
14-
__version__ = '0.1.40'
15-
try:
16-
from ._version import __version_tuple__
17-
except ImportError:
18-
__version_tuple__ = (0, 1, 40)
9+
from ._version import __version__
10+
from ._version import __version_tuple__
11+
1912

2013
from .utils.utils import version
2114
from .utils.utils import list_datasets

coderdata/builder/__init__.py

Lines changed: 0 additions & 2 deletions
This file was deleted.

coderdata/datasets.yml renamed to coderdata/dataset.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
figshare: "https://api.figshare.com/v2/articles/26409316"
2+
version: "v0.1.4"
13
datasets:
24
beataml:
35
description: "Beat acute myeloid leukemia (BeatAML) focuses on acute myeloid leukemia tumor data. Data includes drug response, proteomics, and transcriptomics datasets."
@@ -7,4 +9,4 @@ datasets:
79
hcmi:
810
description: "Human Cancer Models Initiative (HCMI) encompasses numerous cancer types and includes cell line, organoid, and tumor data. Data includes the transcriptomics, somatic mutation, and copy number datasets."
911
mpnst:
10-
description: "Malignant Peripheral Nerve Sheath Tumor is a rare, agressive sarcoma that affects peripheral nerves throughout the body."
12+
description: "Malignant Peripheral Nerve Sheath Tumor is a rare, agressive sarcoma that affects peripheral nerves throughout the body."

coderdata/dataset/dataset.py

Lines changed: 31 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
import pickle
1212
import sys
1313
from typing import Literal
14+
from typing import Optional
15+
from typing import Union
1416

1517
import numpy as np
1618
from numpy.random import RandomState
@@ -335,8 +337,8 @@ def train_test_validate(
335337
'mixed-set', 'drug-blind', 'cancer-blind'
336338
]='mixed-set',
337339
ratio: tuple[int, int, int]=(8,1,1),
338-
stratify_by: (str | None)=None,
339-
random_state: (int | RandomState | None)=None,
340+
stratify_by: Optional[str]=None,
341+
random_state: Optional[Union[int,RandomState]]=None,
340342
**kwargs: dict,
341343
) -> Split:
342344

@@ -386,7 +388,7 @@ def save(self, path: Path) -> None:
386388

387389
def load(
388390
name: str,
389-
directory: str|Path=Path.cwd(),
391+
local_path: Union[str,Path]=Path.cwd(),
390392
from_pickle:bool=False
391393
) -> Dataset:
392394
"""
@@ -411,50 +413,62 @@ def load(
411413
TypeError
412414
_description_
413415
"""
414-
print("Processing Data...", file=sys.stderr)
415416

416-
if type(directory) is not Path:
417+
if type(local_path) is not Path:
417418
try:
418-
directory = Path(directory)
419-
if not directory.exists():
419+
local_path = Path(local_path)
420+
if not local_path.exists():
420421
raise OSError(
421-
f"Given path / directory does not exist: '{directory}'"
422+
f"Given path / directory does not exist: '{local_path}'"
422423
)
423424
except TypeError:
424425
raise TypeError(
425-
f"Invalid path / directory defined: '{directory}'"
426+
f"Invalid path / directory defined: '{local_path}'"
426427
)
427428

428429

429430
if not from_pickle:
430431
dataset = Dataset(name)
431432
accepted_file_endings = ('.csv', '.tsv', '.csv.gz', '.tsv.gz')
432-
for child in directory.iterdir():
433+
print(f"Importing raw data ...", file=sys.stderr)
434+
for child in local_path.iterdir():
433435
if child.name in ["genes.csv", "genes.csv.gz"]:
436+
print(
437+
f"Importing 'genes' from {child} ...",
438+
end=' ',
439+
file=sys.stderr
440+
)
434441
dataset.genes = _load_file(child)
435-
print("Loaded genes dataset.", file=sys.stderr)
442+
print("DONE", file=sys.stderr)
436443

437444
if (
438445
child.name.startswith(name)
439446
and child.name.endswith(accepted_file_endings)
440447
):
441448

442449
dataset_type = child.name[len(name)+1:].split('.')[0]
443-
print(dataset_type)
450+
print(
451+
f"Importing '{dataset_type}' from {child} ...",
452+
end=' ',
453+
file=sys.stderr
454+
)
444455
if hasattr(dataset, dataset_type):
445456
setattr(dataset, dataset_type, _load_file(child))
446-
457+
print("DONE", file=sys.stderr)
458+
print(f"Importing raw data ... DONE", file=sys.stderr)
447459
return dataset
448460

449461
else:
450462
accepted_file_endings = ('.pkl', '.pickle')
451-
for child in directory.iterdir():
463+
for child in local_path.iterdir():
452464
if (
453465
child.name.startswith(name)
454466
and child.name.endswith(accepted_file_endings)
455467
):
468+
print(f"Importing pickled data ...", end=' ', file=sys.stderr)
456469
with open(child, 'rb') as file:
457470
dataset = pickle.load(file=file)
471+
print("DONE", file=sys.stderr)
458472
return dataset
459473

460474

@@ -657,8 +671,8 @@ def train_test_validate(
657671
'mixed-set', 'drug-blind', 'cancer-blind'
658672
]='mixed-set',
659673
ratio: tuple[int, int, int]=(8,1,1),
660-
stratify_by: (str | None)=None,
661-
random_state: (int | RandomState | None)=None,
674+
stratify_by: Optional[str]=None,
675+
random_state: Optional[Union[int,RandomState]]=None,
662676
**kwargs: dict,
663677
) -> Split:
664678
"""
@@ -1003,8 +1017,7 @@ def _load_file(file_path: Path) -> pd.DataFrame:
10031017
)
10041018

10051019

1006-
def _determine_delimiter(file_path):
1007-
print(file_path.suffixes)
1020+
def _determine_delimiter(file_path: Path) -> str:
10081021
if '.tsv' in file_path.suffixes:
10091022
return '\t'
10101023
else:

coderdata/download/downloader.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
# coderdata/download/downloader.py
22

3+
from importlib import resources
34
from pathlib import Path
45
from os import PathLike
56
import os
67
import requests
78
import warnings
89

10+
import yaml
11+
912
def download(
1013
name: str=None,
1114
local_path: PathLike=Path.cwd(),
@@ -44,7 +47,9 @@ def download(
4447
if not local_path.exists():
4548
Path.mkdir(local_path)
4649
# Get the dataset details
47-
url = "https://api.figshare.com/v2/articles/25033697"
50+
with resources.open_text('coderdata', 'dataset.yml') as f:
51+
data_information = yaml.load(f, Loader=yaml.FullLoader)
52+
url = data_information['figshare']
4853

4954
response = requests.get(url)
5055
if response.status_code != 200:

coderdata/download/figshare_latest.yml

Lines changed: 0 additions & 49 deletions
This file was deleted.

coderdata/utils/utils.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
from importlib import resources
66
import yaml
77

8+
from typing import Union
9+
810
from .. import __version__
911
from .. import __version_tuple__
1012

@@ -19,13 +21,18 @@ def version() -> dict:
1921
dict
2022
Contains package and dataset build version.
2123
"""
24+
with resources.open_text('coderdata', 'dataset.yml') as f:
25+
data_information = yaml.load(f, Loader=yaml.FullLoader)
2226
return {
2327
'package' : __version__,
24-
'dataset' : f"{__version_tuple__[0]}.{__version_tuple__[1]}"
28+
# getting the dataset version from 'dataset.yml'
29+
'dataset' : data_information['version'],
30+
# exprapolating the dataset version from the api version number
31+
# 'dataset' : f"{__version_tuple__[0]}.{__version_tuple__[1]}"
2532
}
2633

2734

28-
def list_datasets(raw: bool=False) -> dict | None:
35+
def list_datasets(raw: bool=False) -> Union[dict, None]:
2936
"""
3037
Hepler function that returns a list of available datasets including
3138
a short description and additional information available.
@@ -43,11 +50,11 @@ def list_datasets(raw: bool=False) -> dict | None:
4350
Returns a dict containing the information if ``raw==True``,
4451
otherwise prints information to stdout and returns `None`.
4552
"""
46-
with resources.open_text('coderdata', 'datasets.yml') as f:
47-
datasets = yaml.load(f, Loader=yaml.FullLoader)
53+
with resources.open_text('coderdata', 'dataset.yml') as f:
54+
data_information = yaml.load(f, Loader=yaml.FullLoader)
4855
if raw:
49-
return datasets
56+
return data_information['datasets']
5057
else:
51-
datasets = datasets['datasets']
58+
datasets = data_information['datasets']
5259
for dataset in datasets:
53-
print(f'{dataset}: "{datasets[dataset]['description']}"')
60+
print(f'{dataset}: {datasets[dataset]["description"]}')

pyproject.toml

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
[build-system]
2+
requires = [
3+
"hatchling",
4+
"hatch-vcs",
5+
]
6+
build-backend = "hatchling.build"
7+
8+
[project]
9+
name = "coderdata"
10+
description = "A package to download, load, and process multiple benchmark multi-omic drug response datasets"
11+
12+
requires-python = ">=3.9"
13+
authors = [
14+
{ name = "Jeremy Jacobson", email = "[email protected]" },
15+
{ name = "Yannick Mahlich", email = "[email protected]" },
16+
{ name = "Sara Gosline", email = "[email protected]"}
17+
]
18+
classifiers = [
19+
"License :: OSI Approved :: BSD License",
20+
"Operating System :: OS Independent",
21+
"Programming Language :: Python :: 3 :: Only",
22+
"Programming Language :: Python :: 3.9",
23+
"Programming Language :: Python :: 3.10",
24+
"Programming Language :: Python :: 3.11",
25+
"Programming Language :: Python :: 3.12",
26+
"Programming Language :: Python :: 3.13",
27+
"Topic :: Scientific/Engineering",
28+
"Topic :: Scientific/Engineering :: Bio-Informatics",
29+
]
30+
dependencies = [
31+
"numpy",
32+
"pandas",
33+
"requests",
34+
"scikit-learn",
35+
"pyyaml",
36+
]
37+
dynamic = [
38+
"version",
39+
]
40+
readme = "README.md"
41+
license = {text = "2-clause BSD"}
42+
43+
[project.scripts]
44+
coderdata = "coderdata.cli:main"
45+
46+
[project.urls]
47+
Homepage = "https://github.com/PNNL-CompBio/candleDataProcessing"
48+
Documentation = "https://pnnl-compbio.github.io/coderdata/"
49+
Repository = "https://github.com/PNNL-CompBio/coderdata.git"
50+
Issues = "https://github.com/PNNL-CompBio/coderdata/issues"
51+
52+
[tool.hatch.version]
53+
source = "vcs"
54+
55+
[tool.hatch.build.hooks.vcs]
56+
version-file = "coderdata/_version.py"
57+
58+
[tool.hatch.build.targets.sdist]
59+
include = [
60+
"/coderdata",
61+
]

setup.py

Lines changed: 0 additions & 29 deletions
This file was deleted.

0 commit comments

Comments
 (0)