Skip to content

Commit

Permalink
baf
Browse files Browse the repository at this point in the history
  • Loading branch information
tobiasrausch committed Jul 18, 2024
1 parent a36721b commit 50c014b
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 15 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ all: ${TARGETS}
curl -L -O "https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(shell uname)-$(shell uname -m).sh" && bash Mambaforge-$(shell uname)-$(shell uname -m).sh -b -p mamba && rm "Mambaforge-$(shell uname)-$(shell uname -m).sh" && touch .mamba

.tools: .mamba
export PATH="${PBASE}/mamba/bin:${PATH}" && mamba install -y -c conda-forge -c bioconda datamash samtools bcftools bedtools htslib delly alfred igv wally minimap2 nanoplot sniffles whatshap longshot && touch .tools
export PATH="${PBASE}/mamba/bin:${PATH}" && mamba install -y -c conda-forge -c bioconda datamash samtools bcftools bedtools htslib delly alfred igv wally minimap2 nanoplot sniffles whatshap longshot ont-modkit && touch .tools

.rstats: .mamba .tools
export PATH="${PBASE}/mamba/bin:${PATH}" && mamba install -y -c conda-forge -c bioconda bioconductor-genomicfeatures r-ggplot2 r-reshape2 r-gridextra r-cowplot bioconductor-dnacopy && touch .rstats
Expand Down
21 changes: 20 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -295,8 +295,27 @@ As previously, once IGV has started use 'File' and 'Load from File' to load the
Let's annotate the small variants of the control genome in the matched tumor.

```bash
longshot --no_haps --potential_variants control.longshot.vcf.gz --min_cov 3 --min_alt_count 1 --min_alt_frac 0.01 -s HG008-T --bam tumor.bam --ref genome.fa --out tumor.longshot.vcf
longshot --no_haps --potential_variants control.longshot.vcf.gz --output-ref --min_cov 3 --min_alt_count 1 --min_alt_frac 0.1 -s HG008-T --bam tumor.bam --ref genome.fa --out tumor.longshot.vcf
bgzip tumor.longshot.vcf
tabix tumor.longshot.vcf.gz
```

Then we merge the tumor and control VCF files to annotate all heterozygous variants with their allelic depth in the tumor.

```bash
bcftools merge -O b -o tumor.control.bcf tumor.longshot.vcf.gz control.whatshap.vcf.gz
bcftools query -f "%CHROM\t%POS[\t%GT\t%AD]\n" tumor.control.bcf | grep "0|1" | cut -f 1,2,4 | sed 's/,/\t/' | awk '$3+$4>0 {print $1"\t"$2"\t"($3/($3+$4));}' > var.vaf
bcftools query -f "%CHROM\t%POS[\t%GT\t%AD]\n" tumor.control.bcf | grep "1|0" | cut -f 1,2,4 | sed 's/,/\t/' | awk '$3+$4>0 {print $1"\t"$2"\t"($4/($3+$4));}' >> var.vaf
head var.vaf
```

We can then overlay the variant allele frequencey (B-allele frequency) of small variants with the read-depth information.

```bash
Rscript cnBafSV.R cnv.cov.gz svs.tsv var.vaf
```

#### Exercises

* For the somatic duplication, we have an estimated total copy-number of 3. What are the expected B-allele frequencies in that region?

25 changes: 12 additions & 13 deletions scripts/cnBafSV.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
library(ggplot2)
library(scales)
library(gtable)
library(grid)
library(gridExtra)
library(DNAcopy)
library(cowplot)

args = commandArgs(trailingOnly=TRUE)
x = read.table(args[1], header=T)
Expand Down Expand Up @@ -44,17 +44,16 @@ if (length(args) == 2) { quit(); }

# Read-depth + SVs + BAF
baf = read.table(args[3], header=F)
colnames(baf) = c("pos", "baf")
q = ggplot(data=baf, aes(x=pos, y=baf))
q = q + geom_point(fill="black", colour="black", size=0.1, shape=21)
q = q + ylab("B-Allele Frequency") + xlab("chr2")
colnames(baf) = c("chr", "pos", "baf")
baf$chr = factor(baf$chr, levels=c("chr1", "chr5"))
q = ggplot(data=baf)
q = q + geom_jitter(aes(x=pos, y=baf), fill="red", colour="red", size=0.1, shape=21, width=0, height=0.05)
q = q + geom_jitter(aes(x=pos, y=1-baf), fill="blue", colour="blue", size=0.1, shape=21, width=0, height=0.05)
q = q + ylab("B-Allele Frequency") + xlab("Chromosome")
q = q + scale_x_continuous(labels=comma)
q = q + ylim(0,1)
q = q + scale_y_continuous(labels=comma, breaks=c(0, 0.5, 1), limits=c(0, 1))
q = q + theme(axis.text.x = element_text(angle=45, hjust=1))
p3 = p3 + xlim(min(baf$pos), max(baf$pos))
g1 = ggplotGrob(p3)
g2 = ggplotGrob(q)
g = rbind(g1, g2, size="first")
g$widths = unit.pmax(g1$widths, g2$widths)
ggsave(g, file="cov.png", width=10, height=7)
q = q + facet_wrap(~chr, scales="free_x")
plot_grid(q, p2, align="v", nrow=2, rel_heights=c(1/2, 1/2))
ggsave(file="cov.png", width=10, height=7)
print(warnings())

0 comments on commit 50c014b

Please sign in to comment.