#!/bin/bash
# RNAFusion Container Migration Script
# This script pulls all containers used in the rnafusion pipeline from quay.io
# and pushes them to AWS ECR
set -euo pipefail
# Configuration
AWS_REGION="${AWS_REGION:-us-west-2}"
ECR_REGISTRY="${ECR_REGISTRY:-}"
DRY_RUN="${DRY_RUN:-false}"
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Logging functions
log_info() {
echo -e "${BLUE}[INFO]${NC} $1"
}
log_success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}
log_warning() {
echo -e "${YELLOW}[WARNING]${NC} $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
# Check prerequisites
check_prerequisites() {
log_info "Checking prerequisites..."
if ! command -v docker &> /dev/null; then
log_error "Docker is not installed or not in PATH"
exit 1
fi
if ! command -v aws &> /dev/null; then
log_error "AWS CLI is not installed or not in PATH"
exit 1
fi
if ! aws sts get-caller-identity &> /dev/null; then
log_error "AWS CLI is not configured or credentials are invalid"
exit 1
fi
if [ -z "$ECR_REGISTRY" ]; then
log_error "ECR_REGISTRY environment variable is not set"
log_info "Please set ECR_REGISTRY to your ECR registry URL (e.g., 123456789012.dkr.ecr.us-east-1.amazonaws.com)"
exit 1
fi
log_success "Prerequisites check passed"
}
# Login to ECR
ecr_login() {
log_info "Logging in to ECR..."
# Validate ECR registry format
if [[ ! "$ECR_REGISTRY" =~ ^[0-9]{12}\.dkr\.ecr\.[a-z0-9-]+\.amazonaws\.com$ ]]; then
log_error "Invalid ECR registry format: $ECR_REGISTRY"
log_info "Expected format: 123456789012.dkr.ecr.region.amazonaws.com"
exit 1
fi
# Extract region from ECR registry URL for validation
local extracted_region=$(echo "$ECR_REGISTRY" | sed 's/.*\.dkr\.ecr\.\([^.]*\)\.amazonaws\.com/\1/')
if [ "$extracted_region" != "$AWS_REGION" ]; then
log_warning "ECR registry region ($extracted_region) doesn't match AWS_REGION ($AWS_REGION)"
log_info "Using region from ECR registry: $extracted_region"
AWS_REGION="$extracted_region"
fi
# Test ECR access
log_info "Testing ECR access..."
if ! aws ecr describe-registry --region "$AWS_REGION" &> /dev/null; then
log_error "Cannot access ECR in region $AWS_REGION"
log_info "Please check your AWS credentials and ECR permissions"
exit 1
fi
# Note: We need ECR access even in dry run mode to check if containers exist
# Perform Docker login
if aws ecr get-login-password --region "$AWS_REGION" | docker login --username AWS --password-stdin "$ECR_REGISTRY"; then
log_success "Successfully logged in to ECR"
else
log_error "Failed to login to ECR"
log_info "Please check:"
log_info "1. ECR registry URL is correct: $ECR_REGISTRY"
log_info "2. AWS credentials have ECR permissions"
log_info "3. Docker is running and accessible"
exit 1
fi
}
# Pull and push a single container
migrate_container() {
local source_image="$1"
local target_image="$2"
log_info "Migrating $source_image to $target_image"
if [ "$DRY_RUN" = "true" ]; then
log_info "DRY RUN: Would pull $source_image and push to $target_image"
return 0
fi
# Pull the source image
log_info "Pulling $source_image..."
if ! docker pull "$source_image"; then
log_error "Failed to pull $source_image"
return 1
fi
# Tag the image for ECR
log_info "Tagging image for ECR..."
docker tag "$source_image" "$target_image"
# Push to ECR
log_info "Pushing $target_image to ECR..."
if ! docker push "$target_image"; then
log_error "Failed to push $target_image to ECR"
return 1
fi
log_success "Successfully migrated $source_image to $target_image"
# Clean up local images to save space
log_info "Cleaning up local images..."
docker rmi "$source_image" "$target_image" 2>/dev/null || true
}
# Main container list - all containers found in rnafusion pipeline
declare -a CONTAINERS=(
# nf-core containers (quay.io)
"quay.io/nf-core/ctatsplicing:0.0.3"
"quay.io/nf-core/bclconvert:4.3.6"
"quay.io/nf-core/ubuntu:20.04"
# trinityctat containers (docker.io - not available on quay.io)
"docker.io/trinityctat/starfusion:1.12.0"
# clinicalgenomics containers (docker.io - not available on quay.io)
"docker.io/clinicalgenomics/fusion-report:2.1.8"
"docker.io/clinicalgenomics/fusioncatcher:1.33"
# biocontainers (quay.io)
"quay.io/biocontainers/agat:1.2.0--pl5321hdfd78af_0"
"quay.io/biocontainers/arriba:2.4.0--h0033a41_2"
"quay.io/biocontainers/bedops:2.4.41--h9f5acd7_0"
"quay.io/biocontainers/fastp:0.23.4--h5f740d0_0"
"quay.io/biocontainers/fastqc:0.12.1--hdfd78af_0"
"quay.io/biocontainers/gatk4:4.4.0.0--py36hdfd78af_0"
"quay.io/biocontainers/gnu-wget:1.18--h5bf99c6_5"
"quay.io/biocontainers/kallisto:0.51.1--heb0cbe2_0"
"quay.io/biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:019f262d90511939dce2dca4b7c868fc108f73db-0"
"quay.io/biocontainers/mulled-v2-cf0123ef83b3c38c13e3b0696a3f285d3f20f15b:64aad4a4e144878400649e71f42105311be7ed87-0"
"quay.io/biocontainers/mulled-v2-d9e7bad0f7fbc8f4458d5c3ab7ffaaf0235b59fb:f857e2d6cc88d35580d01cf39e0959a68b83c1d9-0"
"quay.io/biocontainers/multiqc:1.21--pyhdfd78af_0"
"quay.io/biocontainers/pandas:1.5.2"
"quay.io/biocontainers/perl:5.26.2"
"quay.io/biocontainers/picard:3.1.0--hdfd78af_0"
"quay.io/biocontainers/pigz:2.3.4"
"quay.io/biocontainers/preseq:3.2.0--hdcf5f25_6"
"quay.io/biocontainers/python:3.9--1"
"quay.io/biocontainers/rseqc:5.0.3--py39hf95cd2a_0"
"quay.io/biocontainers/samtools:1.17--h00cdaf9_0"
"quay.io/biocontainers/samtools:1.21--h50ea8bc_0"
"quay.io/biocontainers/seqtk:1.4--he4a0461_1"
"quay.io/biocontainers/stringtie:2.2.1--hecb563c_2"
"quay.io/biocontainers/ucsc-gtftogenepred:377--ha8a8165_5"
"quay.io/tempralla/viptool:latest"
# nextflow container (docker.io - not available on quay.io)
"docker.io/nextflow/nextflow:25.04.2"
)
# Create ECR repositories
create_ecr_repositories() {
log_info "Creating ECR repositories..."
for container in "${CONTAINERS[@]}"; do
# Extract repository name from container image (e.g., quay.io/nf-core/ctatsplicing:0.0.3 -> nf-core/ctatsplicing)
# Handle both quay.io and docker.io sources
local repo_name=$(echo "$container" | sed 's|quay\.io/||' | sed 's|docker\.io/||' | cut -d':' -f1)
if [ "$DRY_RUN" = "true" ]; then
log_info "DRY RUN: Would create ECR repository: $repo_name"
continue
fi
# Check if repository exists
if aws ecr describe-repositories --repository-names "$repo_name" --region "$AWS_REGION" &> /dev/null; then
log_info "ECR repository $repo_name already exists"
else
log_info "Creating ECR repository: $repo_name"
if aws ecr create-repository --repository-name "$repo_name" --region "$AWS_REGION" &> /dev/null; then
log_success "Created ECR repository: $repo_name"
else
log_error "Failed to create ECR repository: $repo_name"
exit 1
fi
fi
done
}
# Test ECR connectivity
test_ecr_connectivity() {
log_info "Testing ECR connectivity..."
# Note: We need ECR connectivity even in dry run mode to check if containers exist
# Test with a simple ECR command
if aws ecr describe-repositories --region "$AWS_REGION" --max-items 1 &> /dev/null; then
log_success "ECR connectivity test passed"
else
log_error "ECR connectivity test failed"
log_info "Please check your AWS credentials and ECR permissions"
exit 1
fi
}
# Main migration function
migrate_containers() {
log_info "Starting container migration..."
# Test ECR connectivity first
test_ecr_connectivity
local success_count=0
local total_count=${#CONTAINERS[@]}
for container in "${CONTAINERS[@]}"; do
# Extract repository name and tag (e.g., quay.io/nf-core/ctatsplicing:0.0.3 -> nf-core/ctatsplicing:0.0.3)
# Handle both quay.io and docker.io sources
local repo_name=$(echo "$container" | sed 's|quay\.io/||' | sed 's|docker\.io/||' | cut -d':' -f1)
local tag=$(echo "$container" | cut -d':' -f2-)
local target_image="$ECR_REGISTRY/$repo_name:$tag"
if migrate_container "$container" "$target_image"; then
success_count=$((success_count + 1))
else
log_error "Failed to migrate $container"
fi
echo "Progress: $success_count/$total_count containers migrated"
echo "----------------------------------------"
done
log_success "Migration completed: $success_count/$total_count containers successfully migrated"
}
# Usage information
show_usage() {
cat << EOF
RNAFusion Container Migration Script
This script migrates all containers used in the rnafusion pipeline from quay.io
to AWS ECR.
Usage:
$0 [OPTIONS]
Environment Variables:
ECR_REGISTRY ECR registry URL (required)
AWS_REGION AWS region (default: us-west-2)
DRY_RUN Set to 'true' for dry run (default: false)
Options:
-h, --help Show this help message
--dry-run Perform a dry run without actually migrating containers
--create-repos Only create ECR repositories (don't migrate containers)
Examples:
# Set ECR registry and run migration
export ECR_REGISTRY="123456789012.dkr.ecr.us-east-1.amazonaws.com"
$0
# Dry run to see what would be migrated
$0 --dry-run
# Only create ECR repositories
$0 --create-repos
Prerequisites:
- Docker installed and running
- AWS CLI configured with appropriate permissions
- ECR_REGISTRY environment variable set
Required AWS Permissions:
- ecr:CreateRepository
- ecr:GetAuthorizationToken
- ecr:BatchCheckLayerAvailability
- ecr:GetDownloadUrlForLayer
- ecr:BatchGetImage
- ecr:InitiateLayerUpload
- ecr:UploadLayerPart
- ecr:CompleteLayerUpload
- ecr:PutImage
EOF
}
# Main script logic
main() {
case "${1:-}" in
-h|--help)
show_usage
exit 0
;;
--dry-run)
DRY_RUN="true"
log_info "Running in DRY RUN mode"
;;
--create-repos)
check_prerequisites
create_ecr_repositories
exit 0
;;
"")
# Normal execution
;;
*)
log_error "Unknown option: $1"
show_usage
exit 1
;;
esac
check_prerequisites
ecr_login
create_ecr_repositories
migrate_containers
log_success "Migration script completed successfully!"
log_info "Next steps:"
log_info "1. Update your Nextflow configuration to use ECR containers"
log_info "2. Test the pipeline with the new ECR containers"
}
# Run main function with all arguments
main "$@"