File Info

Filename
.command.sh
Full Path
s3://natera-rnd-pltf-dev-nextflow-scratch-01/work/87/1035f1f7a14597ed43d144cf7053c3/.command.sh
Size
2.1 KB
Attempt
#!/bin/bash -Ceuo pipefail
bcftools norm \
    -m -both \
    --old-rec-tag OLD_RECORD \
    --check-ref w \
    -f Homo_sapiens_assembly38.fasta \
    HCC1395_tumor_vs_HCC1395_BL.tnseq.filtered.vcf.gz \
| bcftools view -i 'STRLEN(REF)==STRLEN(ALT)' \
| awk -F'\t' -v OFS='\t' '
    /^##INFO/ && !h {
        print "##INFO=<ID=VARIANT_TYPE,Number=1,Type=String,Description=\"Variant type: SNV or MNV\">"
        h=1
    }
    /^#/ { print; next }
    function fix_gt(sample,    nf, fields, gt, na, sep, m, alleles, k, has_alt, has_ref, result) {
        nf = split(sample, fields, ":")
        gt = fields[1]
        sep = (index(gt, "|") > 0) ? "|" : "/"
        m = split(gt, alleles, "[/|]")
        if (m <= 2) return sample
        has_alt = 0; has_ref = 0
        for (k = 1; k <= m; k++) {
            if (alleles[k] != "." && alleles[k]+0 > 0) has_alt++
            else if (alleles[k] == "0") has_ref++
        }
        if (has_alt > 0) fields[1] = "0" sep "1"
        else if (has_ref > 0) fields[1] = "0" sep "0"
        else fields[1] = "." sep "."
        result = fields[1]
        for (k = 2; k <= nf; k++) result = result ":" fields[k]
        return result
    }
    {
        if (length($4)==1 && length($5)==1) vt="SNV"
        else vt="MNV"
        $8 = ($8=="." ? "VARIANT_TYPE=" vt : $8 ";VARIANT_TYPE=" vt)
        for (s = 10; s <= NF; s++) $s = fix_gt($s)
        print
    }
' \
| bcftools view -Oz -o HCC1395_tumor_vs_HCC1395_BL.snv_indel.tnseq.snv.unsorted.vcf.gz

# Enforce canonical sample order (tumor, normal) so concat never sees mismatched columns
printf '%s\n%s\n' "HCC1395_tumor" "HCC1395_BL" > sample_order.txt
bcftools view -S sample_order.txt HCC1395_tumor_vs_HCC1395_BL.snv_indel.tnseq.snv.unsorted.vcf.gz -Oz -o HCC1395_tumor_vs_HCC1395_BL.snv_indel.tnseq.snv.vcf.gz
rm HCC1395_tumor_vs_HCC1395_BL.snv_indel.tnseq.snv.unsorted.vcf.gz

tabix -p vcf HCC1395_tumor_vs_HCC1395_BL.snv_indel.tnseq.snv.vcf.gz

cat <<-END_VERSIONS > versions.yml
"NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_SOMATIC_ALL:VCF_SOMATIC_SNV_INDEL:PREP_TNSEQ_SNV":
    bcftools: $(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*$//')
END_VERSIONS