File Info

Filename
.command.sh
Full Path
s3://natera-rnd-pltf-dev-nextflow-scratch-01/work/84/c97988f0456db4ddc8142bfe3c5607/.command.sh
Size
955 bytes
Attempt
#!/bin/bash -euo pipefail
# Sort partial VCFs by shard label (natural sort handles shard_0001 < shard_0010).
SORTED_VCFS=$(ls *.vcf | sort -V)
FIRST=$(echo "$SORTED_VCFS" | head -1)

# Take header from first VCF, data lines from all in shard order, then
# sort the body by chrom + pos so the merged VCF is genomically sorted.
grep "^#" "$FIRST" > 2026-0072_HG00733_HG00732_FF_40.unsorted.vcf
for vcf in $SORTED_VCFS; do
    grep -v "^#" "$vcf" >> 2026-0072_HG00733_HG00732_FF_40.unsorted.vcf || true
done

grep "^#" 2026-0072_HG00733_HG00732_FF_40.unsorted.vcf > 2026-0072_HG00733_HG00732_FF_40.vcf
grep -v "^#" 2026-0072_HG00733_HG00732_FF_40.unsorted.vcf | sort -k1,1V -k2,2n >> 2026-0072_HG00733_HG00732_FF_40.vcf || true
rm 2026-0072_HG00733_HG00732_FF_40.unsorted.vcf

# Recompute per-call proximity features over the global call set.
nipt_recompute_proximity.py --vcf 2026-0072_HG00733_HG00732_FF_40.vcf --output 2026-0072_HG00733_HG00732_FF_40.vcf