Skip to content

Commit

Permalink
organized prep_LINE1_db.sh related scripts and the result
Browse files Browse the repository at this point in the history
  • Loading branch information
friend1ws committed Apr 15, 2024
1 parent e75ee1f commit fa77f34
Show file tree
Hide file tree
Showing 12 changed files with 19 additions and 15 deletions.
Binary file added resource/LINE1_db/LINE1.chm13v2.0.bed.gz
Binary file not shown.
Binary file added resource/LINE1_db/LINE1.chm13v2.0.bed.gz.tbi
Binary file not shown.
Binary file added resource/LINE1_db/LINE1.hg19.bed.gz
Binary file not shown.
Binary file added resource/LINE1_db/LINE1.hg19.bed.gz.tbi
Binary file not shown.
Binary file added resource/LINE1_db/LINE1.hg38.bed.gz
Binary file not shown.
Binary file added resource/LINE1_db/LINE1.hg38.bed.gz.tbi
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,18 @@ then
aws s3 cp s3://human-pangenomics/T2T/CHM13/assemblies/annotation/chm13v2.0_RepeatMasker_4.1.2p1.2022Apr14.bed ./
fi

python proc_rmsk.py rmsk.txt.gz > rmsk.line1.hg38.bed
sort -k1,1 -k2,2n -k3,3n chm13v2.0_RepeatMasker_4.1.2p1.2022Apr14.bed | bgzip -c > chm13v2.0_RepeatMasker_4.1.2p1.2022Apr14.bed.gz

tabix -p bed chm13v2.0_RepeatMasker_4.1.2p1.2022Apr14.bed.gz

python subscript/proc_rmsk.py rmsk.txt.gz > rmsk.line1.hg38.bed

liftOver rmsk.line1.hg38.bed hg38ToHg19.over.chain.gz rmsk.line1.hg19.bed.tmp rmsk.line1.unmapped

python mod_label.py rmsk.line1.hg19.bed.tmp > rmsk.line1.hg19.bed
python subscript/mod_label.py rmsk.line1.hg19.bed.tmp > rmsk.line1.hg19.bed


python proc_rmsk_chm13.py chm13v2.0_RepeatMasker_4.1.2p1.2022Apr14.bed.gz > rmsk.line1.chm13v2.0.bed
python subscript/proc_rmsk_chm13.py chm13v2.0_RepeatMasker_4.1.2p1.2022Apr14.bed.gz > rmsk.line1.chm13v2.0.bed

#########

Expand All @@ -63,13 +67,13 @@ fi

bcftools filter -i 'INFO/SVLEN > 5800 && INFO/SVTYPE == "LINE1"' ALL.wgs.mergedSV.v8.20130502.svs.genotypes.vcf.gz | cut -f 1-8 > 1000genomes.line1.hg19.vcf

python proc_1000genomes.py 1000genomes.line1.hg19.vcf > 1000genomes.line1.hg19.bed
python subscript/proc_1000genomes.py 1000genomes.line1.hg19.vcf > 1000genomes.line1.hg19.bed

liftOver 1000genomes.line1.hg19.bed hg19ToHg38.over.chain.gz 1000genomes.line1.hg38.bed.tmp 1000genomes.line1.hg38.unmapped
liftOver 1000genomes.line1.hg19.bed hg19-chm13v2.over.chain.gz 1000genomes.line1.chm13v2.0.bed.tmp 1000genomes.line1.chm13v2.0.unmapped

python mod_label.py 1000genomes.line1.hg38.bed.tmp > 1000genomes.line1.hg38.bed
python mod_label.py 1000genomes.line1.chm13v2.0.bed.tmp > 1000genomes.line1.chm13v2.0.bed
python subscript/mod_label.py 1000genomes.line1.hg38.bed.tmp > 1000genomes.line1.hg38.bed
python subscript/mod_label.py 1000genomes.line1.chm13v2.0.bed.tmp > 1000genomes.line1.chm13v2.0.bed

##########

Expand All @@ -89,13 +93,13 @@ fi

bcftools filter -i 'ALT == "<INS:ME:LINE1>" && INFO/SVLEN >= 5800' gnomad_v2.1_sv.controls_only.sites.vcf.gz | cut -f 1-8 > gnomad.line1.hg19.vcf

python proc_gnomad.py gnomad.line1.hg19.vcf > gnomad.line1.hg19.bed
python subscript/proc_gnomad.py gnomad.line1.hg19.vcf > gnomad.line1.hg19.bed

liftOver gnomad.line1.hg19.bed hg19ToHg38.over.chain.gz gnomad.line1.hg38.bed.tmp gnomad.line1.hg38.unmapped
liftOver gnomad.line1.hg19.bed hg19-chm13v2.over.chain.gz gnomad.line1.chm13v2.0.bed.tmp gnomad.line1.chm13v2.0.unmapped

python mod_label.py gnomad.line1.hg38.bed.tmp > gnomad.line1.hg38.bed
python mod_label.py gnomad.line1.chm13v2.0.bed.tmp > gnomad.line1.chm13v2.0.bed
python subscript/mod_label.py gnomad.line1.hg38.bed.tmp > gnomad.line1.hg38.bed
python subscript/mod_label.py gnomad.line1.chm13v2.0.bed.tmp > gnomad.line1.chm13v2.0.bed

##########

Expand All @@ -120,14 +124,14 @@ bgzip -c LINE1.chm13v2.0.bed > LINE1.chm13v2.0.bed.gz
tabix -p bed LINE1.chm13v2.0.bed.gz


mv LINE1.hg38.bed.gz ../nanomonsv/data/
mv LINE1.hg38.bed.gz.tbi ../nanomonsv/data/
# mv LINE1.hg38.bed.gz ../nanomonsv/data/
# mv LINE1.hg38.bed.gz.tbi ../nanomonsv/data/

mv LINE1.hg19.bed.gz ../nanomonsv/data/
mv LINE1.hg19.bed.gz.tbi ../nanomonsv/data/
# mv LINE1.hg19.bed.gz ../nanomonsv/data/
# mv LINE1.hg19.bed.gz.tbi ../nanomonsv/data/

mv LINE1.chm13v2.0.bed.gz ../nanomonsv/data/
mv LINE1.chm13v2.0.bed.gz.tbi ../nanomonsv/data/
# mv LINE1.chm13v2.0.bed.gz ../nanomonsv/data/
# mv LINE1.chm13v2.0.bed.gz.tbi ../nanomonsv/data/

##########

Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

0 comments on commit fa77f34

Please sign in to comment.