Skip to content

Commit 692329d

Browse files
authored
Merge pull request #280 from PNNL-CompBio/panc_pdo
Addition of pancreaticPDO dataset.
2 parents bd584cc + cb3fe89 commit 692329d

22 files changed

+2427
-0
lines changed

.github/workflows/build.yml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,30 @@ jobs:
177177
push: true
178178
platforms: linux/amd64
179179

180+
build-pancpdo:
181+
runs-on: ubuntu-latest
182+
steps:
183+
- name: Checkout
184+
uses: actions/checkout@v3
185+
- name: Set up QEMU
186+
uses: docker/setup-qemu-action@v3
187+
- name: Set up Docker Buildx
188+
uses: docker/setup-buildx-action@v3
189+
- name: Login to DockerHub
190+
uses: docker/login-action@v3
191+
with:
192+
username: ${{ secrets.DOCKERHUB_USERNAME }}
193+
password: ${{ secrets.DOCKERHUB_PASSWORD }}
194+
- name: Build and push pancpdo
195+
uses: docker/build-push-action@v3
196+
with:
197+
file: ./build/docker/Dockerfile.pancpdo
198+
tags: |
199+
sgosline/pancpdo:latest
200+
sgosline/pancpdo:${{ github.ref_name }}
201+
push: true
202+
platforms: linux/amd64
203+
180204
build-upload:
181205
runs-on: ubuntu-latest
182206
steps:

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,4 @@ tests/__pycache__
1818
dist
1919
build/lib
2020
build/local
21+
local/

build/build_dataset.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ def process_docker(dataset,validate):
4242
'beataml': ['beataml'],
4343
'mpnst': ['mpnst'],
4444
'mpnstpdx': ['mpnstpdx'],
45+
'pancpdo': ['pancpdo'],
4546
'cptac': ['cptac'],
4647
'genes': ['genes'],
4748
'upload': ['upload']
@@ -123,6 +124,7 @@ def process_omics(executor, dataset, should_continue):
123124
'broad_sanger': ['copy_number', 'mutations', 'proteomics', 'transcriptomics'],
124125
'cptac': ['copy_number', 'mutations', 'proteomics', 'transcriptomics'],
125126
'hcmi': ['mutations', 'transcriptomics'],
127+
'pancpdo': ['transcriptomics'],
126128
'mpnstpdx':['copy_number', 'mutations', 'proteomics', 'transcriptomics']
127129
}
128130

build/docker/Dockerfile.pancpdo

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
FROM python:3.9
2+
3+
WORKDIR /usr/src/app
4+
5+
COPY build/pancpdo/01-createPancPDOSamplesFile.py .
6+
COPY build/pancpdo/02-getPancPDOData.py .
7+
COPY build/pancpdo/03-getPancPDODrugs.py .
8+
COPY build/pancpdo/04-getPancPDOExperiments.py .
9+
COPY build/pancpdo/05-addPrecalcAUC.py .
10+
COPY build/pancpdo/full_manifest.txt .
11+
COPY build/pancpdo/requirements.txt .
12+
COPY build/pancpdo/*sh ./
13+
COPY build/pancpdo/pancpdo_cancer_types.csv ./
14+
COPY build/utils/* ./
15+
16+
# Set MPLCONFIGDIR to a writable directory
17+
ENV MPLCONFIGDIR=/app/tmp/matplotlib
18+
RUN mkdir -p /app/tmp/matplotlib
19+
20+
RUN pip install --no-cache-dir -r requirements.txt
21+
VOLUME ['/tmp']

build/docker/docker-compose.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,14 @@ services:
2626
HTTPS_PROXY: ${HTTPS_PROXY}
2727
platform: linux/amd64
2828
image: hcmi:latest
29+
pancpdo:
30+
build:
31+
context: ../../
32+
dockerfile: build/docker/Dockerfile.pancpdo
33+
args:
34+
HTTPS_PROXY: ${HTTPS_PROXY}
35+
platform: linux/amd64
36+
image: pancpdo:latest
2937

3038
beataml:
3139
build:

build/hcmi/02-getHCMIData.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,30 @@ def download_tool(url):
3636
# Download the file
3737
print("Downloading tool...")
3838
filename = wget.download(url)
39+
##commented due to merge conflict
40+
# files_before = os.listdir()
41+
# # shutil.unpack_archive(filename)
42+
# ##there are two files to unpack
43+
# print('Unpacking platform-specific path')
44+
# shutil.unpack_archive(os.path.basename(url))
45+
# #This is just set for AWS to debug. This will have to be mapped to OS. They changed their file structure. This should be updated.
46+
# print('Unpacking secondary zip')
47+
# fnames={
48+
# 'Darwin':"gdc-client_2.3_OSX_x64.zip",
49+
# 'Linux':"gdc-client_2.3_Ubuntu_x64.zip",
50+
# 'Windows':"gdc-client_2.3_Windows_x64.zip"
51+
# }
52+
# shutil.unpack_archive(fnames[platform.system()])
53+
# #This is just set for AWS to debug. This will have to be mapped to OS. They changed their file structure. This should be updated.
54+
# shutil.unpack_archive("gdc-client_2.3_Ubuntu_x64.zip")
55+
# if not os.path.exists('gdc-client'):
56+
# raise FileNotFoundError("gdc-client executable not found after extraction.")
57+
# # Ensure 'gdc-client' is executable
58+
# st = os.stat('gdc-client')
59+
# os.chmod('gdc-client', st.st_mode | stat.S_IEXEC)
60+
# # Return the path to the executable
61+
# return './gdc-client'
62+
3963

4064
# First extraction
4165
print(f"\nExtracting {filename}...")
@@ -65,6 +89,7 @@ def download_tool(url):
6589

6690
return gdc_client_path
6791

92+
6893
def is_tool(name):
6994
"""
7095
Check if a specific tool is available on the system.

0 commit comments

Comments
 (0)