#!/bin/bash -euo pipefail
# Sort partial VCFs by shard label (natural sort handles shard_0001 < shard_0010).
SORTED_VCFS=$(ls *.vcf | sort -V)
FIRST=$(echo "$SORTED_VCFS" | head -1)
# Take header from first VCF, data lines from all in shard order, then
# sort the body by chrom + pos so the merged VCF is genomically sorted.
grep "^#" "$FIRST" > 2026-0077_HG00733_HG00732_FF_1_25.unsorted.vcf
for vcf in $SORTED_VCFS; do
grep -v "^#" "$vcf" >> 2026-0077_HG00733_HG00732_FF_1_25.unsorted.vcf || true
done
grep "^#" 2026-0077_HG00733_HG00732_FF_1_25.unsorted.vcf > 2026-0077_HG00733_HG00732_FF_1_25.vcf
grep -v "^#" 2026-0077_HG00733_HG00732_FF_1_25.unsorted.vcf | sort -k1,1V -k2,2n >> 2026-0077_HG00733_HG00732_FF_1_25.vcf || true
rm 2026-0077_HG00733_HG00732_FF_1_25.unsorted.vcf
# Recompute per-call proximity features over the global call set.
nipt_recompute_proximity.py --vcf 2026-0077_HG00733_HG00732_FF_1_25.vcf --output 2026-0077_HG00733_HG00732_FF_1_25.vcf