File Info

Filename
nextflow-bin/run_ngscheckmate.sh
Full Path
s3://natera-rnd-pltf-dev-nextflow-scratch-01/work/ce/ea0686309417ad545f412df63a99ae/nextflow-bin/run_ngscheckmate.sh
Size
2.1 KB
Attempt
#!/bin/bash
# run_ngscheckmate.sh - Run NGSCheckMate on a single BAM file
# 
# This script generates a VCF file with genotype information at ~17,000 common
# SNP positions using NGSCheckMate. The VCF output is used for RNA contamination
# detection via VAF analysis.
#
# Usage: ./run_ngscheckmate.sh <input.bam> <output_dir>
#
# Requirements:
#   - Docker
#   - Reference genome mounted at /refs/ctat_genome_lib_build_dir/ref_genome.fa
#   - BAM file directory mounted appropriately
#
# Example:
#   ./run_ngscheckmate.sh /data/samples/sample1.bam /data/results/

set -e

# Check arguments
if [ "$#" -lt 2 ]; then
    echo "Usage: $0 <input.bam> <output_dir>"
    echo ""
    echo "Arguments:"
    echo "  input.bam   - Path to input BAM file"
    echo "  output_dir  - Directory for output VCF file"
    exit 1
fi

INPUT_BAM="$1"
OUTPUT_DIR="$2"
SAMPLE_NAME=$(basename "$INPUT_BAM" .bam)

# Docker image
DOCKER_IMAGE="quay.io/biocontainers/ngscheckmate:1.0.0--py27r41hdfd78af_3"

# Default mount paths (customize these for your environment)
REFS_DIR="${REFS_DIR:-/home/ubuntu/references}"
RUNS_DIR="${RUNS_DIR:-/home/ubuntu/runs_sync}"
DATA_DIR="${DATA_DIR:-/home/ubuntu/data}"

echo "========================================"
echo "NGSCheckMate - RNA Contamination Detection"
echo "========================================"
echo "Input BAM: $INPUT_BAM"
echo "Output dir: $OUTPUT_DIR"
echo "Sample: $SAMPLE_NAME"
echo ""

# Create output directory
mkdir -p "$OUTPUT_DIR"

# Create temporary BAM list
TMP_LIST="$(mktemp)"
echo "$INPUT_BAM" > "$TMP_LIST"

# Determine mount paths based on input BAM location
# Adjust the volume mounts as needed for your environment
docker run --rm \
    -v "$REFS_DIR":/refs \
    -v "$RUNS_DIR":/runs \
    -v "$DATA_DIR":/data \
    -v "$OUTPUT_DIR":/output \
    -v "$TMP_LIST":/tmp/bam_list.txt \
    -e NCM_REF=/refs/ctat_genome_lib_build_dir/ref_genome.fa \
    "$DOCKER_IMAGE" \
    bash /usr/local/bin/ncm.py \
    -B \
    -l /tmp/bam_list.txt \
    -bed /usr/local/NGSCheckMate/SNP/SNP_GRCh38_hg38_wChr.bed \
    -O /output \
    -N "$SAMPLE_NAME"

# Cleanup
rm -f "$TMP_LIST"

echo ""
echo "Done! VCF output: $OUTPUT_DIR/$SAMPLE_NAME.vcf"