#!/bin/bash # run_ngscheckmate.sh - Run NGSCheckMate on a single BAM file # # This script generates a VCF file with genotype information at ~17,000 common # SNP positions using NGSCheckMate. The VCF output is used for RNA contamination # detection via VAF analysis. # # Usage: ./run_ngscheckmate.sh # # Requirements: # - Docker # - Reference genome mounted at /refs/ctat_genome_lib_build_dir/ref_genome.fa # - BAM file directory mounted appropriately # # Example: # ./run_ngscheckmate.sh /data/samples/sample1.bam /data/results/ set -e # Check arguments if [ "$#" -lt 2 ]; then echo "Usage: $0 " echo "" echo "Arguments:" echo " input.bam - Path to input BAM file" echo " output_dir - Directory for output VCF file" exit 1 fi INPUT_BAM="$1" OUTPUT_DIR="$2" SAMPLE_NAME=$(basename "$INPUT_BAM" .bam) # Docker image DOCKER_IMAGE="quay.io/biocontainers/ngscheckmate:1.0.0--py27r41hdfd78af_3" # Default mount paths (customize these for your environment) REFS_DIR="${REFS_DIR:-/home/ubuntu/references}" RUNS_DIR="${RUNS_DIR:-/home/ubuntu/runs_sync}" DATA_DIR="${DATA_DIR:-/home/ubuntu/data}" echo "========================================" echo "NGSCheckMate - RNA Contamination Detection" echo "========================================" echo "Input BAM: $INPUT_BAM" echo "Output dir: $OUTPUT_DIR" echo "Sample: $SAMPLE_NAME" echo "" # Create output directory mkdir -p "$OUTPUT_DIR" # Create temporary BAM list TMP_LIST="$(mktemp)" echo "$INPUT_BAM" > "$TMP_LIST" # Determine mount paths based on input BAM location # Adjust the volume mounts as needed for your environment docker run --rm \ -v "$REFS_DIR":/refs \ -v "$RUNS_DIR":/runs \ -v "$DATA_DIR":/data \ -v "$OUTPUT_DIR":/output \ -v "$TMP_LIST":/tmp/bam_list.txt \ -e NCM_REF=/refs/ctat_genome_lib_build_dir/ref_genome.fa \ "$DOCKER_IMAGE" \ bash /usr/local/bin/ncm.py \ -B \ -l /tmp/bam_list.txt \ -bed /usr/local/NGSCheckMate/SNP/SNP_GRCh38_hg38_wChr.bed \ -O /output \ -N "$SAMPLE_NAME" # Cleanup rm -f "$TMP_LIST" echo "" echo "Done! VCF output: $OUTPUT_DIR/$SAMPLE_NAME.vcf"