#!/bin/bash -Ceuo pipefail
# Tag every record in the force-called VCF with INFO/FORCE_CALLED flag
zcat germline_control_2.forcecall.unfiltered.vcf.gz | awk '
BEGIN {OFS="\t"}
/^##/ {print; next}
/^#CHROM/ {
print "##INFO=<ID=FORCE_CALLED,Number=0,Type=Flag,Description=\"Variant was force-called from DeepVariant rescue\">"
print
next
}
{
if ($8 == ".") $8 = "FORCE_CALLED"
else $8 = $8 ";FORCE_CALLED"
print
}
' | bgzip > fc_tagged.vcf.gz
tabix -p vcf fc_tagged.vcf.gz
# Concatenate original + tagged force-called, then sort
bcftools concat \
-a germline_control_2.haplotyper.unfiltered.vcf.gz fc_tagged.vcf.gz \
-Ou \
| bcftools sort \
-Oz -o germline_control_2.haplotyper.merged.vcf.gz
tabix -p vcf germline_control_2.haplotyper.merged.vcf.gz
cat <<-END_VERSIONS > versions.yml
"DAQ:CONTROL_VARIANT_CALLING:CONTROL_GERMLINE_VC:BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER_RF:MERGE_FORCECALLED":
bcftools: $(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*$//')
END_VERSIONS