#!/bin/bash -Ceuo pipefail # Wrapper: treat rg exit 1 (no matches) as success, propagate real errors (exit 2+) rg_allow_empty() { rg "$@"; local s=$?; [ $s -eq 1 ] && return 0; return $s; } # Count Y-specific k-mers in first 10M reads (sequence lines = every 4th line starting from line 2) COUNT=$(rapidgzip -cd -P 4 --ranges 40000000L@0 "HCC1395_tumor_R1.fastq.gz" | sed -n '2~4p' | rg_allow_empty -oF -f SRY_kmers.txt | wc -l) # Determine gender based on count threshold (>5 = XY, <=5 = XX) if [ "$COUNT" -gt 5 ]; then GENDER="XY" else GENDER="XX" fi echo "HCC1395_tumor,$COUNT,$GENDER" > HCC1395_tumor_gender_estimation.txt cat <<-END_VERSIONS > versions.yml "DAQ:ESTIMATEGENDER": estimategender: "1.0.1" rapidgzip: $(rapidgzip --version 2>&1 | head -1 | sed 's/rapidgzip, //') ripgrep: $(rg --version | head -1 | sed 's/ripgrep //') END_VERSIONS