diff --git a/.github/workflows/awscleanup.yaml b/.github/workflows/awscleanup.yaml new file mode 100644 index 00000000..b29df852 --- /dev/null +++ b/.github/workflows/awscleanup.yaml @@ -0,0 +1,62 @@ +name: Daily AWS Cleanup Bot + +on: + schedule: + - cron: '0 8 * * *' + +jobs: + cleanup: + runs-on: linux-amd64-cpu4 + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up AWS CLI + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: us-west-1 + + - name: Identify resources for deletion + id: identify-resources + run: | + # Find vpcs with names ci* + vpcs=$(aws ec2 describe-vpcs \ + --filters "Name=tag:Name,Values=ci*" \ + --query "Vpcs[].VpcId" \ + --output text | tr -d '\r' | tr '\n' ' ') + echo "Found VPCs: $vpcs" + echo "vpcs=$vpcs" >> $GITHUB_ENV + + - name: Clean up VPCs + if: env.vpcs != '' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + get_tag_value(){ + local vpc_id=$1 + local key=$2 + aws ec2 describe-tags --filters "Name=resource-id,Values=$vpc_id" "Name=key,Values=$key" \ + --query "Tags[0].Value" --output text + } + for vpc in $vpcs; do + github_repository=$(get_tag_value $vpc "GitHubRepository") + run_id=$(get_tag_value $vpc "GitHubRunId") + job_name=$(get_tag_value $vpc "GitHubJob") + response=$(curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \ + "https://api.github.com/repos/${github_repository}/actions/runs/${run_id}/jobs") + if [[ -z "$response" || "$response" == "null" ]]; then + continue + fi + status=$(echo "$response" | jq -r ".jobs? // [] | map(select(.name | test(\"^$job_name\"))) | .[0]?.status // \"null\"" 2>/dev/null || echo "null") + if [[ "$status" != "null" && ! -z "$status" && $(echo "$status" | grep -qvE '^(queued|in_progress)$'; echo $?) -eq 0 ]]; then + echo "Holodeck e2e Job status is not in running stage , Delete the dependent resources" + scripts/awscleanup.sh $vpc + fi + done + + - name: Post cleanup + run: | + echo "Cleanup completed." diff --git a/scripts/awscleanup.sh b/scripts/awscleanup.sh new file mode 100755 index 00000000..27e809c8 --- /dev/null +++ b/scripts/awscleanup.sh @@ -0,0 +1,142 @@ +#!/bin/bash + +if [[ $# -ne 1 ]]; then + echo " vpcid required for deletion" + exit 1 +fi +export vpc=$1 + +echo "Start Deleting VPC: $vpc resource" + +# Delete Instance +instances=$(aws ec2 describe-instances \ + --filters "Name=vpc-id,Values=$vpc" \ + --query "Reservations[].Instances[].InstanceId" \ + --output text | tr -d '\r' | tr '\n' ' ') +for instance in $instances; do + echo "Terminating instance: $instance" + aws ec2 terminate-instances --instance-ids "$instance" +done + +# Delete Internet Gateway +internet_gateways=$(aws ec2 describe-internet-gateways \ + --filters Name=attachment.vpc-id,Values=$vpc \ + --query "InternetGateways[].InternetGatewayId" \ + --output text | tr -d '\r' | tr '\n' ' ') +for igw in $internet_gateways; do + aws ec2 detach-internet-gateway --internet-gateway-id "$igw" --vpc-id "$vpc" + aws ec2 delete-internet-gateway --internet-gateway-id "$igw" +done + +# Delete NAT Gateways +nat_gateways=$(aws ec2 describe-nat-gateways \ + --filter Name=vpc-id,Values=$vpc \ + --query "NatGateways[].NatGatewayId" \ + --output text | tr -d '\r' | tr '\n' ' ') +for ngw in $nat_gateways; do + aws ec2 delete-nat-gateway --nat-gateway-id "$ngw" +done + +# Delete Elastic IPs +eips=$(aws ec2 describe-addresses \ + --filters Name=domain,Values=vpc \ + --query "Addresses[].[AllocationId,Association.VpcId]" \ + --output text | grep "$vpc" | awk '{print $1}' | tr -d '\r' | tr '\n' ' ') +for eip in $eips; do + aws ec2 release-address --allocation-id "$eip" +done + +# Detach and Delete Security Groups +security_groups=$(aws ec2 describe-security-groups \ + --filters Name=vpc-id,Values=$vpc \ + --query "SecurityGroups[?GroupName!='default'].GroupId" \ + --output text | tr -d '\r' | tr '\n' ' ') +for sg in $security_groups; do + enis=$(aws ec2 describe-network-interfaces \ + --filters Name=group-id,Values=$sg \ + --query "NetworkInterfaces[].NetworkInterfaceId" \ + --output text | tr -d '\r' | tr '\n' ' ') + for eni in $enis; do + aws ec2 modify-network-interface-attribute \ + --network-interface-id "$eni" \ + --groups "$(aws ec2 describe-security-groups \ + --query 'SecurityGroups[?GroupName==`default`].GroupId' \ + --output text)" + done + aws ec2 delete-security-group --group-id "$sg" +done + +# Delete Route Tables +# 1. Make first rt as Main , as we cannot delete vpcs attached with main +# 2. replace all rt with first rt +# 3. delete rt +# 4. Main table(first_rt) will be deleted once vpc deleted +first_rt="" +route_tables=$(aws ec2 describe-route-tables \ + --filters Name=vpc-id,Values=$vpc \ + --query "RouteTables[].RouteTableId" \ + --output text | tr -d '\r' | tr '\n' ' ') +for rt in $route_tables; do + associations=$(aws ec2 describe-route-tables \ + --route-table-ids "$rt" \ + --query "RouteTables[].Associations[].RouteTableAssociationId" \ + --output text | tr -d '\r' | tr '\n' ' ') + for assoc_id in $associations; do + if [ -z "$first_rt" ]; then + aws ec2 replace-route-table-association --association-id $assoc_id --route-table-id $rt + first_rt=$rt + else + aws ec2 replace-route-table-association --association-id $assoc_id --route-table-id $first_rt + aws ec2 delete-route-table --route-table-id "$rt" + fi + done +done + +# Delete Subnets +subnets=$(aws ec2 describe-subnets \ + --filters Name=vpc-id,Values=$vpc \ + --query "Subnets[].SubnetId" \ + --output text | tr -d '\r' | tr '\n' ' ') +for subnet in $subnets; do + aws ec2 delete-subnet --subnet-id "$subnet" +done + +# Delete Network Interfaces +eni_ids=$(aws ec2 describe-network-interfaces \ + --filters Name=vpc-id,Values=$vpc \ + --query "NetworkInterfaces[].NetworkInterfaceId" \ + --output text | tr -d '\r' | tr '\n' ' ') +for eni in $eni_ids; do + aws ec2 delete-network-interface --network-interface-id "$eni" +done + +# Delete Network ACLs +nw_acls=$(aws ec2 describe-network-acls \ + --filters "Name=vpc-id,Values=$vpc" \ + --query "NetworkAcls[?IsDefault==false].NetworkAclId" \ + --output text | tr -d '\r' | tr '\n' ' ') +for acl in $nw_acls; do + echo "Deleting Network ACL: $acl" + aws ec2 delete-network-acl --network-acl-id $acl +done + +# Delete vpc +# try 3 times with 30 seconds interval +attempts=0 +echo "All resource Deleted for VPC: $vpc , now delete vpc" +while [ $attempts -lt 3 ]; do + echo "Attempting to delete VPC: $vpc (Attempt $((attempts+1)))" + if aws ec2 delete-vpc --vpc-id $vpc; then + echo "Successfully deleted VPC: $vpc" + break + else + attempts=$((attempts + 1)) + if [ $attempts -lt 3 ]; then + echo "Failed to delete VPC: $vpc. Retrying in 30 seconds..." + sleep 30 + fi + fi +done +if [ $attempts -eq 3 ]; then + echo "Failed to delete VPC: $vpc after 3 attempts. Continue the loop to delete other vpc" +fi