#!/bin/bash -euo pipefail # Sort partial VCFs by shard label (natural sort handles shard_0001 < shard_0010). SORTED_VCFS=$(ls *.vcf | sort -V) FIRST=$(echo "$SORTED_VCFS" | head -1) # Take header from first VCF, data lines from all in shard order, then # sort the body by chrom + pos so the merged VCF is genomically sorted. grep "^#" "$FIRST" > 2026-0077_HG00733_HG00732_FF_1_25.unsorted.vcf for vcf in $SORTED_VCFS; do grep -v "^#" "$vcf" >> 2026-0077_HG00733_HG00732_FF_1_25.unsorted.vcf || true done grep "^#" 2026-0077_HG00733_HG00732_FF_1_25.unsorted.vcf > 2026-0077_HG00733_HG00732_FF_1_25.vcf grep -v "^#" 2026-0077_HG00733_HG00732_FF_1_25.unsorted.vcf | sort -k1,1V -k2,2n >> 2026-0077_HG00733_HG00732_FF_1_25.vcf || true rm 2026-0077_HG00733_HG00732_FF_1_25.unsorted.vcf # Recompute per-call proximity features over the global call set. nipt_recompute_proximity.py --vcf 2026-0077_HG00733_HG00732_FF_1_25.vcf --output 2026-0077_HG00733_HG00732_FF_1_25.vcf