File Info

Filename
.command.sh
Full Path
s3://natera-rnd-pltf-dev-nextflow-scratch-01/work/b8/9562e24b479bf150628ab4a30030cc/.command.sh
Size
811 bytes
Attempt
#!/bin/bash -euo pipefail
# Sort partial VCFs by shard label (natural sort handles shard_0001 < shard_0010).
SORTED_VCFS=$(ls *.vcf | sort -V)
FIRST=$(echo "$SORTED_VCFS" | head -1)

# Take header from first VCF, data lines from all in shard order, then
# sort the body by chrom + pos so the merged VCF is genomically sorted.
grep "^#" "$FIRST" > 5pct-FF-HG00733.unsorted.vcf
for vcf in $SORTED_VCFS; do
    grep -v "^#" "$vcf" >> 5pct-FF-HG00733.unsorted.vcf || true
done

grep "^#" 5pct-FF-HG00733.unsorted.vcf > 5pct-FF-HG00733.vcf
grep -v "^#" 5pct-FF-HG00733.unsorted.vcf | sort -k1,1V -k2,2n >> 5pct-FF-HG00733.vcf || true
rm 5pct-FF-HG00733.unsorted.vcf

# Recompute per-call proximity features over the global call set.
nipt_recompute_proximity.py --vcf 5pct-FF-HG00733.vcf --output 5pct-FF-HG00733.vcf