#!/bin/bash # RNAFusion Container Migration Script # This script pulls all containers used in the rnafusion pipeline from quay.io # and pushes them to AWS ECR set -euo pipefail # Configuration AWS_REGION="${AWS_REGION:-us-west-2}" ECR_REGISTRY="${ECR_REGISTRY:-}" DRY_RUN="${DRY_RUN:-false}" # Colors for output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' # No Color # Logging functions log_info() { echo -e "${BLUE}[INFO]${NC} $1" } log_success() { echo -e "${GREEN}[SUCCESS]${NC} $1" } log_warning() { echo -e "${YELLOW}[WARNING]${NC} $1" } log_error() { echo -e "${RED}[ERROR]${NC} $1" } # Check prerequisites check_prerequisites() { log_info "Checking prerequisites..." if ! command -v docker &> /dev/null; then log_error "Docker is not installed or not in PATH" exit 1 fi if ! command -v aws &> /dev/null; then log_error "AWS CLI is not installed or not in PATH" exit 1 fi if ! aws sts get-caller-identity &> /dev/null; then log_error "AWS CLI is not configured or credentials are invalid" exit 1 fi if [ -z "$ECR_REGISTRY" ]; then log_error "ECR_REGISTRY environment variable is not set" log_info "Please set ECR_REGISTRY to your ECR registry URL (e.g., 123456789012.dkr.ecr.us-east-1.amazonaws.com)" exit 1 fi log_success "Prerequisites check passed" } # Login to ECR ecr_login() { log_info "Logging in to ECR..." # Validate ECR registry format if [[ ! "$ECR_REGISTRY" =~ ^[0-9]{12}\.dkr\.ecr\.[a-z0-9-]+\.amazonaws\.com$ ]]; then log_error "Invalid ECR registry format: $ECR_REGISTRY" log_info "Expected format: 123456789012.dkr.ecr.region.amazonaws.com" exit 1 fi # Extract region from ECR registry URL for validation local extracted_region=$(echo "$ECR_REGISTRY" | sed 's/.*\.dkr\.ecr\.\([^.]*\)\.amazonaws\.com/\1/') if [ "$extracted_region" != "$AWS_REGION" ]; then log_warning "ECR registry region ($extracted_region) doesn't match AWS_REGION ($AWS_REGION)" log_info "Using region from ECR registry: $extracted_region" AWS_REGION="$extracted_region" fi # Test ECR access log_info "Testing ECR access..." if ! aws ecr describe-registry --region "$AWS_REGION" &> /dev/null; then log_error "Cannot access ECR in region $AWS_REGION" log_info "Please check your AWS credentials and ECR permissions" exit 1 fi # Note: We need ECR access even in dry run mode to check if containers exist # Perform Docker login if aws ecr get-login-password --region "$AWS_REGION" | docker login --username AWS --password-stdin "$ECR_REGISTRY"; then log_success "Successfully logged in to ECR" else log_error "Failed to login to ECR" log_info "Please check:" log_info "1. ECR registry URL is correct: $ECR_REGISTRY" log_info "2. AWS credentials have ECR permissions" log_info "3. Docker is running and accessible" exit 1 fi } # Pull and push a single container migrate_container() { local source_image="$1" local target_image="$2" log_info "Migrating $source_image to $target_image" if [ "$DRY_RUN" = "true" ]; then log_info "DRY RUN: Would pull $source_image and push to $target_image" return 0 fi # Pull the source image log_info "Pulling $source_image..." if ! docker pull "$source_image"; then log_error "Failed to pull $source_image" return 1 fi # Tag the image for ECR log_info "Tagging image for ECR..." docker tag "$source_image" "$target_image" # Push to ECR log_info "Pushing $target_image to ECR..." if ! docker push "$target_image"; then log_error "Failed to push $target_image to ECR" return 1 fi log_success "Successfully migrated $source_image to $target_image" # Clean up local images to save space log_info "Cleaning up local images..." docker rmi "$source_image" "$target_image" 2>/dev/null || true } # Main container list - all containers found in rnafusion pipeline declare -a CONTAINERS=( # nf-core containers (quay.io) "quay.io/nf-core/ctatsplicing:0.0.3" "quay.io/nf-core/bclconvert:4.3.6" "quay.io/nf-core/ubuntu:20.04" # trinityctat containers (docker.io - not available on quay.io) "docker.io/trinityctat/starfusion:1.12.0" # clinicalgenomics containers (docker.io - not available on quay.io) "docker.io/clinicalgenomics/fusion-report:2.1.8" "docker.io/clinicalgenomics/fusioncatcher:1.33" # biocontainers (quay.io) "quay.io/biocontainers/agat:1.2.0--pl5321hdfd78af_0" "quay.io/biocontainers/arriba:2.4.0--h0033a41_2" "quay.io/biocontainers/bedops:2.4.41--h9f5acd7_0" "quay.io/biocontainers/fastp:0.23.4--h5f740d0_0" "quay.io/biocontainers/fastqc:0.12.1--hdfd78af_0" "quay.io/biocontainers/gatk4:4.4.0.0--py36hdfd78af_0" "quay.io/biocontainers/gnu-wget:1.18--h5bf99c6_5" "quay.io/biocontainers/kallisto:0.51.1--heb0cbe2_0" "quay.io/biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:019f262d90511939dce2dca4b7c868fc108f73db-0" "quay.io/biocontainers/mulled-v2-cf0123ef83b3c38c13e3b0696a3f285d3f20f15b:64aad4a4e144878400649e71f42105311be7ed87-0" "quay.io/biocontainers/mulled-v2-d9e7bad0f7fbc8f4458d5c3ab7ffaaf0235b59fb:f857e2d6cc88d35580d01cf39e0959a68b83c1d9-0" "quay.io/biocontainers/multiqc:1.21--pyhdfd78af_0" "quay.io/biocontainers/pandas:1.5.2" "quay.io/biocontainers/perl:5.26.2" "quay.io/biocontainers/picard:3.1.0--hdfd78af_0" "quay.io/biocontainers/pigz:2.3.4" "quay.io/biocontainers/preseq:3.2.0--hdcf5f25_6" "quay.io/biocontainers/python:3.9--1" "quay.io/biocontainers/rseqc:5.0.3--py39hf95cd2a_0" "quay.io/biocontainers/samtools:1.17--h00cdaf9_0" "quay.io/biocontainers/samtools:1.21--h50ea8bc_0" "quay.io/biocontainers/seqtk:1.4--he4a0461_1" "quay.io/biocontainers/stringtie:2.2.1--hecb563c_2" "quay.io/biocontainers/ucsc-gtftogenepred:377--ha8a8165_5" "quay.io/tempralla/viptool:latest" # nextflow container (docker.io - not available on quay.io) "docker.io/nextflow/nextflow:25.04.2" ) # Create ECR repositories create_ecr_repositories() { log_info "Creating ECR repositories..." for container in "${CONTAINERS[@]}"; do # Extract repository name from container image (e.g., quay.io/nf-core/ctatsplicing:0.0.3 -> nf-core/ctatsplicing) # Handle both quay.io and docker.io sources local repo_name=$(echo "$container" | sed 's|quay\.io/||' | sed 's|docker\.io/||' | cut -d':' -f1) if [ "$DRY_RUN" = "true" ]; then log_info "DRY RUN: Would create ECR repository: $repo_name" continue fi # Check if repository exists if aws ecr describe-repositories --repository-names "$repo_name" --region "$AWS_REGION" &> /dev/null; then log_info "ECR repository $repo_name already exists" else log_info "Creating ECR repository: $repo_name" if aws ecr create-repository --repository-name "$repo_name" --region "$AWS_REGION" &> /dev/null; then log_success "Created ECR repository: $repo_name" else log_error "Failed to create ECR repository: $repo_name" exit 1 fi fi done } # Test ECR connectivity test_ecr_connectivity() { log_info "Testing ECR connectivity..." # Note: We need ECR connectivity even in dry run mode to check if containers exist # Test with a simple ECR command if aws ecr describe-repositories --region "$AWS_REGION" --max-items 1 &> /dev/null; then log_success "ECR connectivity test passed" else log_error "ECR connectivity test failed" log_info "Please check your AWS credentials and ECR permissions" exit 1 fi } # Main migration function migrate_containers() { log_info "Starting container migration..." # Test ECR connectivity first test_ecr_connectivity local success_count=0 local total_count=${#CONTAINERS[@]} for container in "${CONTAINERS[@]}"; do # Extract repository name and tag (e.g., quay.io/nf-core/ctatsplicing:0.0.3 -> nf-core/ctatsplicing:0.0.3) # Handle both quay.io and docker.io sources local repo_name=$(echo "$container" | sed 's|quay\.io/||' | sed 's|docker\.io/||' | cut -d':' -f1) local tag=$(echo "$container" | cut -d':' -f2-) local target_image="$ECR_REGISTRY/$repo_name:$tag" if migrate_container "$container" "$target_image"; then success_count=$((success_count + 1)) else log_error "Failed to migrate $container" fi echo "Progress: $success_count/$total_count containers migrated" echo "----------------------------------------" done log_success "Migration completed: $success_count/$total_count containers successfully migrated" } # Usage information show_usage() { cat << EOF RNAFusion Container Migration Script This script migrates all containers used in the rnafusion pipeline from quay.io to AWS ECR. Usage: $0 [OPTIONS] Environment Variables: ECR_REGISTRY ECR registry URL (required) AWS_REGION AWS region (default: us-west-2) DRY_RUN Set to 'true' for dry run (default: false) Options: -h, --help Show this help message --dry-run Perform a dry run without actually migrating containers --create-repos Only create ECR repositories (don't migrate containers) Examples: # Set ECR registry and run migration export ECR_REGISTRY="123456789012.dkr.ecr.us-east-1.amazonaws.com" $0 # Dry run to see what would be migrated $0 --dry-run # Only create ECR repositories $0 --create-repos Prerequisites: - Docker installed and running - AWS CLI configured with appropriate permissions - ECR_REGISTRY environment variable set Required AWS Permissions: - ecr:CreateRepository - ecr:GetAuthorizationToken - ecr:BatchCheckLayerAvailability - ecr:GetDownloadUrlForLayer - ecr:BatchGetImage - ecr:InitiateLayerUpload - ecr:UploadLayerPart - ecr:CompleteLayerUpload - ecr:PutImage EOF } # Main script logic main() { case "${1:-}" in -h|--help) show_usage exit 0 ;; --dry-run) DRY_RUN="true" log_info "Running in DRY RUN mode" ;; --create-repos) check_prerequisites create_ecr_repositories exit 0 ;; "") # Normal execution ;; *) log_error "Unknown option: $1" show_usage exit 1 ;; esac check_prerequisites ecr_login create_ecr_repositories migrate_containers log_success "Migration script completed successfully!" log_info "Next steps:" log_info "1. Update your Nextflow configuration to use ECR containers" log_info "2. Test the pipeline with the new ECR containers" } # Run main function with all arguments main "$@"