#!/bin/bash -Ceuo pipefail
# Wrapper: treat rg exit 1 (no matches) as success, propagate real errors (exit 2+)
rg_allow_empty() { rg "$@"; local s=$?; [ $s -eq 1 ] && return 0; return $s; }
# Count Y-specific k-mers in first 10M reads (sequence lines = every 4th line starting from line 2)
COUNT=$(rapidgzip -cd -P 4 --ranges 40000000L@0 "Filler-D0000861_R1.fastq.gz" | sed -n '2~4p' | rg_allow_empty -oF -f SRY_kmers.txt | wc -l)
# Determine gender based on count threshold (>5 = XY, <=5 = XX)
if [ "$COUNT" -gt 5 ]; then
GENDER="XY"
else
GENDER="XX"
fi
echo "Filler-D0000861,$COUNT,$GENDER" > Filler-D0000861_gender_estimation.txt
cat <<-END_VERSIONS > versions.yml
"DAQ:ESTIMATEGENDER":
estimategender: "1.0.1"
rapidgzip: $(rapidgzip --version 2>&1 | head -1 | sed 's/rapidgzip, //')
ripgrep: $(rg --version | head -1 | sed 's/ripgrep //')
END_VERSIONS