File Info

Filename
.command.sh
Full Path
s3://natera-rnd-pltf-dev-nextflow-scratch-01/work/eb/559f355f8e9c631ae93f4c61efb866/.command.sh
Size
880 bytes
Attempt
#!/bin/bash -Ceuo pipefail
# Wrapper: treat rg exit 1 (no matches) as success, propagate real errors (exit 2+)
rg_allow_empty() { rg "$@"; local s=$?; [ $s -eq 1 ] && return 0; return $s; }

# Count Y-specific k-mers in first 10M reads (sequence lines = every 4th line starting from line 2)
COUNT=$(rapidgzip -cd -P 4 --ranges 40000000L@0 "Filler-D0002474_R1.fastq.gz" | sed -n '2~4p' | rg_allow_empty -oF -f SRY_kmers.txt | wc -l)

# Determine gender based on count threshold (>5 = XY, <=5 = XX)
if [ "$COUNT" -gt 5 ]; then
    GENDER="XY"
else
    GENDER="XX"
fi

echo "Filler-D0002474,$COUNT,$GENDER" > Filler-D0002474_gender_estimation.txt

cat <<-END_VERSIONS > versions.yml
"DAQ:ESTIMATEGENDER":
    estimategender: "1.0.1"
    rapidgzip: $(rapidgzip --version 2>&1 | head -1 | sed 's/rapidgzip, //')
    ripgrep: $(rg --version | head -1 | sed 's/ripgrep //')
END_VERSIONS