Sometimes you want to purge a glue table from s3 and delete all files and versions. Lately i had to do this often so i created this small script to do this.
It shows a list of glue tables for the current aws account and with selecting the number will delete the files and the table from glue.
import boto3
import argparse
import awswrangler as wr
from pprint import pprint
def get_glue_tables():
tables = [
{
"name": table["Name"],
"database": table["DatabaseName"],
"path": table["StorageDescriptor"]["Location"][5:], # Remove "s3://" prefix
}
for table in wr.catalog.get_tables()
]
return tables
def display_menu(options):
print("Select a table:")
for i, option in enumerate(options, 1):
print(f"{i}. {option['database']:30} {option['name']:10} {option['path']}")
print("0. Exit")
while True:
choice = input("Enter the number of your choice: ")
if choice.isdigit():
choice = int(choice)
if 0 <= choice < len(options):
return choice
print("Invalid choice. Please enter a valid number.")
else:
print("Invalid input. Please enter a valid number.")
def display_sure():
while True:
choice = input("Sure to delete? (yes/no): ").strip().lower()
if choice == "yes":
return True
elif choice == "no":
return False
print("Invalid input. Please enter 'yes' or 'no.")
def bucket_action(table, delete=False):
path = table["path"]
bucket_name, subfolder = path.split("/", 1)
s3 = boto3.client("s3")
def process_object(version):
if delete:
resp = s3.delete_object(
Bucket=bucket_name,
Key=version["Key"],
VersionId=version["VersionId"],
)
print(".", end="")
else:
print(
f"Object key: {version['Key']}, Version ID: {version['VersionId']}"
)
response = s3.list_object_versions(Bucket=bucket_name, Prefix=subfolder)
for version in response.get("Versions", []):
process_object(version)
while response.get("IsTruncated"):
response = s3.list_object_versions(
Bucket=bucket_name,
Prefix=subfolder,
KeyMarker=response.get("NextKeyMarker"),
VersionIdMarker=response.get("NextVersionIdMarker"),
)
for version in response.get("Versions", []):
process_object(version)
if delete:
print("Deleting glue table")
wr.catalog.delete_table_if_exists(
database=table["database"], table=table["name"]
)
while True:
tables = get_glue_tables()
option = display_menu(tables)
if option == 0:
break
selected_table = tables[option - 1]
bucket_action(selected_table)
if display_sure():
bucket_action(selected_table, delete=True)
Top comments (0)