File Info

Filename
.command.sh
Full Path
s3://natera-rnd-pltf-dev-nextflow-scratch-01/work/d8/a3acee5b62f04bfc11684c3c180d8f/.command.sh
Size
1.6 KB
Attempt
#!/bin/bash -Ceuo pipefail
snv=$(bcftools view -s 'Sig_18_tissue' -f PASS positive_somatic_control_2.snv_indel.phased.vep.final.vcf.gz | bcftools view -H -e 'FORMAT/AF[0:0]<0.05' -v snps   | wc -l)
indel=$(bcftools view -s 'Sig_18_tissue' -f PASS positive_somatic_control_2.snv_indel.phased.vep.final.vcf.gz | bcftools view -H -e 'FORMAT/AF[0:0]<0.05' -v indels | wc -l)

{
    printf "snv_count\t%s\n"   "$snv"
    printf "indel_count\t%s\n" "$indel"
} > positive_somatic_control_2.snv_indel.tsv

bcftools norm -m - -f Homo_sapiens_assembly38.fasta -Oz -o coa.norm.vcf.gz hd789_coa.vcf.gz
bcftools index -t coa.norm.vcf.gz

bcftools view -s 'Sig_18_tissue' -f PASS -Oz -o calls.filt.vcf.gz positive_somatic_control_2.snv_indel.phased.vep.final.vcf.gz
bcftools norm -m - -f Homo_sapiens_assembly38.fasta -Oz -o calls.norm.vcf.gz calls.filt.vcf.gz
bcftools index -t calls.norm.vcf.gz

bcftools isec -p isec_out -Oz calls.norm.vcf.gz coa.norm.vcf.gz

coa_total=$(bcftools view -H coa.norm.vcf.gz      | wc -l)
coa_recov=$(bcftools view -H isec_out/0003.vcf.gz | wc -l)
coa_missed=$(bcftools query -f '%ID|%CHROM:%POS:%REF:%ALT\n' isec_out/0001.vcf.gz \
    | awk -F'|' '{ if ($1 != "" && $1 != ".") print $1; else print $2 }' \
    | paste -sd';' -)
: ${coa_missed:=NONE}

{
    printf "coa_truth_total\t%s\n"     "$coa_total"
    printf "coa_recovered_count\t%s\n" "$coa_recov"
    printf "coa_missed_variants\t%s\n" "$coa_missed"
} >> positive_somatic_control_2.snv_indel.tsv


cat <<-END_VERSIONS > versions.yml
"DAQ:CONTROL_METRICS:POSITIVE_SOMATIC_METRICS:POSITIVE_SOMATIC_SNV_INDEL":
    bcftools: $(bcftools --version | head -n1 | awk '{print $2}')
END_VERSIONS