File Info

Filename
nextflow-bin/migrate_containers_to_ecr.sh
Full Path
s3://natera-rnd-pltf-dev-nextflow-scratch-01/work/ea/ece298e70e4508363693947760703c/nextflow-bin/migrate_containers_to_ecr.sh
Size
11.2 KB
Attempt
#!/bin/bash

# RNAFusion Container Migration Script
# This script pulls all containers used in the rnafusion pipeline from quay.io
# and pushes them to AWS ECR

set -euo pipefail

# Configuration
AWS_REGION="${AWS_REGION:-us-west-2}"
ECR_REGISTRY="${ECR_REGISTRY:-}"
DRY_RUN="${DRY_RUN:-false}"

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color

# Logging functions
log_info() {
    echo -e "${BLUE}[INFO]${NC} $1"
}

log_success() {
    echo -e "${GREEN}[SUCCESS]${NC} $1"
}

log_warning() {
    echo -e "${YELLOW}[WARNING]${NC} $1"
}

log_error() {
    echo -e "${RED}[ERROR]${NC} $1"
}

# Check prerequisites
check_prerequisites() {
    log_info "Checking prerequisites..."
    
    if ! command -v docker &> /dev/null; then
        log_error "Docker is not installed or not in PATH"
        exit 1
    fi
    
    if ! command -v aws &> /dev/null; then
        log_error "AWS CLI is not installed or not in PATH"
        exit 1
    fi
    
    if ! aws sts get-caller-identity &> /dev/null; then
        log_error "AWS CLI is not configured or credentials are invalid"
        exit 1
    fi
    
    if [ -z "$ECR_REGISTRY" ]; then
        log_error "ECR_REGISTRY environment variable is not set"
        log_info "Please set ECR_REGISTRY to your ECR registry URL (e.g., 123456789012.dkr.ecr.us-east-1.amazonaws.com)"
        exit 1
    fi
    
    log_success "Prerequisites check passed"
}

# Login to ECR
ecr_login() {
    log_info "Logging in to ECR..."
    
    # Validate ECR registry format
    if [[ ! "$ECR_REGISTRY" =~ ^[0-9]{12}\.dkr\.ecr\.[a-z0-9-]+\.amazonaws\.com$ ]]; then
        log_error "Invalid ECR registry format: $ECR_REGISTRY"
        log_info "Expected format: 123456789012.dkr.ecr.region.amazonaws.com"
        exit 1
    fi
    
    # Extract region from ECR registry URL for validation
    local extracted_region=$(echo "$ECR_REGISTRY" | sed 's/.*\.dkr\.ecr\.\([^.]*\)\.amazonaws\.com/\1/')
    if [ "$extracted_region" != "$AWS_REGION" ]; then
        log_warning "ECR registry region ($extracted_region) doesn't match AWS_REGION ($AWS_REGION)"
        log_info "Using region from ECR registry: $extracted_region"
        AWS_REGION="$extracted_region"
    fi
    
    # Test ECR access
    log_info "Testing ECR access..."
    if ! aws ecr describe-registry --region "$AWS_REGION" &> /dev/null; then
        log_error "Cannot access ECR in region $AWS_REGION"
        log_info "Please check your AWS credentials and ECR permissions"
        exit 1
    fi
    
    # Note: We need ECR access even in dry run mode to check if containers exist
    
    # Perform Docker login
    if aws ecr get-login-password --region "$AWS_REGION" | docker login --username AWS --password-stdin "$ECR_REGISTRY"; then
        log_success "Successfully logged in to ECR"
    else
        log_error "Failed to login to ECR"
        log_info "Please check:"
        log_info "1. ECR registry URL is correct: $ECR_REGISTRY"
        log_info "2. AWS credentials have ECR permissions"
        log_info "3. Docker is running and accessible"
        exit 1
    fi
}


# Pull and push a single container
migrate_container() {
    local source_image="$1"
    local target_image="$2"
    
    log_info "Migrating $source_image to $target_image"
    
    if [ "$DRY_RUN" = "true" ]; then
        log_info "DRY RUN: Would pull $source_image and push to $target_image"
        return 0
    fi
    
    # Pull the source image
    log_info "Pulling $source_image..."
    if ! docker pull "$source_image"; then
        log_error "Failed to pull $source_image"
        return 1
    fi
    
    # Tag the image for ECR
    log_info "Tagging image for ECR..."
    docker tag "$source_image" "$target_image"
    
    # Push to ECR
    log_info "Pushing $target_image to ECR..."
    if ! docker push "$target_image"; then
        log_error "Failed to push $target_image to ECR"
        return 1
    fi
    
    log_success "Successfully migrated $source_image to $target_image"
    
    # Clean up local images to save space
    log_info "Cleaning up local images..."
    docker rmi "$source_image" "$target_image" 2>/dev/null || true
}

# Main container list - all containers found in rnafusion pipeline
declare -a CONTAINERS=(
    # nf-core containers (quay.io)
    "quay.io/nf-core/ctatsplicing:0.0.3"
    "quay.io/nf-core/bclconvert:4.3.6"
    "quay.io/nf-core/ubuntu:20.04"
    
    # trinityctat containers (docker.io - not available on quay.io)
    "docker.io/trinityctat/starfusion:1.12.0"
    
    # clinicalgenomics containers (docker.io - not available on quay.io)
    "docker.io/clinicalgenomics/fusion-report:2.1.8"
    "docker.io/clinicalgenomics/fusioncatcher:1.33"
    
    # biocontainers (quay.io)
    "quay.io/biocontainers/agat:1.2.0--pl5321hdfd78af_0"
    "quay.io/biocontainers/arriba:2.4.0--h0033a41_2"
    "quay.io/biocontainers/bedops:2.4.41--h9f5acd7_0"
    "quay.io/biocontainers/fastp:0.23.4--h5f740d0_0"
    "quay.io/biocontainers/fastqc:0.12.1--hdfd78af_0"
    "quay.io/biocontainers/gatk4:4.4.0.0--py36hdfd78af_0"
    "quay.io/biocontainers/gnu-wget:1.18--h5bf99c6_5"
    "quay.io/biocontainers/kallisto:0.51.1--heb0cbe2_0"
    "quay.io/biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:019f262d90511939dce2dca4b7c868fc108f73db-0"
    "quay.io/biocontainers/mulled-v2-cf0123ef83b3c38c13e3b0696a3f285d3f20f15b:64aad4a4e144878400649e71f42105311be7ed87-0"
    "quay.io/biocontainers/mulled-v2-d9e7bad0f7fbc8f4458d5c3ab7ffaaf0235b59fb:f857e2d6cc88d35580d01cf39e0959a68b83c1d9-0"
    "quay.io/biocontainers/multiqc:1.21--pyhdfd78af_0"
    "quay.io/biocontainers/pandas:1.5.2"
    "quay.io/biocontainers/perl:5.26.2"
    "quay.io/biocontainers/picard:3.1.0--hdfd78af_0"
    "quay.io/biocontainers/pigz:2.3.4"
    "quay.io/biocontainers/preseq:3.2.0--hdcf5f25_6"
    "quay.io/biocontainers/python:3.9--1"
    "quay.io/biocontainers/rseqc:5.0.3--py39hf95cd2a_0"
    "quay.io/biocontainers/samtools:1.17--h00cdaf9_0"
    "quay.io/biocontainers/samtools:1.21--h50ea8bc_0"
    "quay.io/biocontainers/seqtk:1.4--he4a0461_1"
    "quay.io/biocontainers/stringtie:2.2.1--hecb563c_2"
    "quay.io/biocontainers/ucsc-gtftogenepred:377--ha8a8165_5"
    "quay.io/tempralla/viptool:latest"
    
    # nextflow container (docker.io - not available on quay.io)
    "docker.io/nextflow/nextflow:25.04.2"
)

# Create ECR repositories
create_ecr_repositories() {
    log_info "Creating ECR repositories..."
    
    for container in "${CONTAINERS[@]}"; do
        # Extract repository name from container image (e.g., quay.io/nf-core/ctatsplicing:0.0.3 -> nf-core/ctatsplicing)
        # Handle both quay.io and docker.io sources
        local repo_name=$(echo "$container" | sed 's|quay\.io/||' | sed 's|docker\.io/||' | cut -d':' -f1)
        
        if [ "$DRY_RUN" = "true" ]; then
            log_info "DRY RUN: Would create ECR repository: $repo_name"
            continue
        fi
        
        # Check if repository exists
        if aws ecr describe-repositories --repository-names "$repo_name" --region "$AWS_REGION" &> /dev/null; then
            log_info "ECR repository $repo_name already exists"
        else
            log_info "Creating ECR repository: $repo_name"
            if aws ecr create-repository --repository-name "$repo_name" --region "$AWS_REGION" &> /dev/null; then
                log_success "Created ECR repository: $repo_name"
            else
                log_error "Failed to create ECR repository: $repo_name"
                exit 1
            fi
        fi
    done
}

# Test ECR connectivity
test_ecr_connectivity() {
    log_info "Testing ECR connectivity..."
    
    # Note: We need ECR connectivity even in dry run mode to check if containers exist
    
    # Test with a simple ECR command
    if aws ecr describe-repositories --region "$AWS_REGION" --max-items 1 &> /dev/null; then
        log_success "ECR connectivity test passed"
    else
        log_error "ECR connectivity test failed"
        log_info "Please check your AWS credentials and ECR permissions"
        exit 1
    fi
}

# Main migration function
migrate_containers() {
    log_info "Starting container migration..."
    
    # Test ECR connectivity first
    test_ecr_connectivity
    
    local success_count=0
    local total_count=${#CONTAINERS[@]}
    
    for container in "${CONTAINERS[@]}"; do
        # Extract repository name and tag (e.g., quay.io/nf-core/ctatsplicing:0.0.3 -> nf-core/ctatsplicing:0.0.3)
        # Handle both quay.io and docker.io sources
        local repo_name=$(echo "$container" | sed 's|quay\.io/||' | sed 's|docker\.io/||' | cut -d':' -f1)
        local tag=$(echo "$container" | cut -d':' -f2-)
        local target_image="$ECR_REGISTRY/$repo_name:$tag"
        
        if migrate_container "$container" "$target_image"; then
            success_count=$((success_count + 1))
        else
            log_error "Failed to migrate $container"
        fi
        
        echo "Progress: $success_count/$total_count containers migrated"
        echo "----------------------------------------"
    done
    
    log_success "Migration completed: $success_count/$total_count containers successfully migrated"
}


# Usage information
show_usage() {
    cat << EOF
RNAFusion Container Migration Script

This script migrates all containers used in the rnafusion pipeline from quay.io
to AWS ECR.

Usage:
    $0 [OPTIONS]

Environment Variables:
    ECR_REGISTRY       ECR registry URL (required)
    AWS_REGION         AWS region (default: us-west-2)
    DRY_RUN           Set to 'true' for dry run (default: false)

Options:
    -h, --help        Show this help message
    --dry-run         Perform a dry run without actually migrating containers
    --create-repos    Only create ECR repositories (don't migrate containers)

Examples:
    # Set ECR registry and run migration
    export ECR_REGISTRY="123456789012.dkr.ecr.us-east-1.amazonaws.com"
    $0

    # Dry run to see what would be migrated
    $0 --dry-run

    # Only create ECR repositories
    $0 --create-repos

Prerequisites:
    - Docker installed and running
    - AWS CLI configured with appropriate permissions
    - ECR_REGISTRY environment variable set

Required AWS Permissions:
    - ecr:CreateRepository
    - ecr:GetAuthorizationToken
    - ecr:BatchCheckLayerAvailability
    - ecr:GetDownloadUrlForLayer
    - ecr:BatchGetImage
    - ecr:InitiateLayerUpload
    - ecr:UploadLayerPart
    - ecr:CompleteLayerUpload
    - ecr:PutImage

EOF
}

# Main script logic
main() {
    case "${1:-}" in
        -h|--help)
            show_usage
            exit 0
            ;;
        --dry-run)
            DRY_RUN="true"
            log_info "Running in DRY RUN mode"
            ;;
        --create-repos)
            check_prerequisites
            create_ecr_repositories
            exit 0
            ;;
        "")
            # Normal execution
            ;;
        *)
            log_error "Unknown option: $1"
            show_usage
            exit 1
            ;;
    esac
    
    check_prerequisites
    ecr_login
    create_ecr_repositories
    migrate_containers
    
    log_success "Migration script completed successfully!"
    log_info "Next steps:"
    log_info "1. Update your Nextflow configuration to use ECR containers"
    log_info "2. Test the pipeline with the new ECR containers"
}

# Run main function with all arguments
main "$@"