From c0b694accf9dce2730631d21c381357ec2d94c97 Mon Sep 17 00:00:00 2001 From: colinmoynes Date: Mon, 15 Dec 2025 11:06:53 +0000 Subject: [PATCH 1/3] Implementing rate limit handling and improved multi threaded performance. --- Docker/Cloudsmith Docker Sleuth/multiarch.py | 293 ++++++++++++------- 1 file changed, 186 insertions(+), 107 deletions(-) diff --git a/Docker/Cloudsmith Docker Sleuth/multiarch.py b/Docker/Cloudsmith Docker Sleuth/multiarch.py index cabecd3..767d8e2 100755 --- a/Docker/Cloudsmith Docker Sleuth/multiarch.py +++ b/Docker/Cloudsmith Docker Sleuth/multiarch.py @@ -8,6 +8,7 @@ import urllib.error from urllib.parse import urlencode import concurrent.futures +import time # Try to import rich try: @@ -15,6 +16,7 @@ from rich.table import Table from rich import box from rich.text import Text + from rich.progress import Progress, SpinnerColumn, BarColumn, TextColumn except ImportError: print("Error: This script requires the 'rich' library.") print("Please install it using: pip install rich") @@ -32,7 +34,7 @@ # --- Helper Functions --- def make_request(url, headers=None, method='GET', data=None): - """Performs an HTTP request and returns parsed JSON.""" + """Performs an HTTP request and returns parsed JSON. Handles rate limiting.""" if headers is None: headers = {} @@ -42,14 +44,48 @@ def make_request(url, headers=None, method='GET', data=None): if data: req.data = data.encode('utf-8') - try: - with urllib.request.urlopen(req) as response: - return json.loads(response.read().decode('utf-8')) - except urllib.error.HTTPError as e: - return None - except Exception as e: - # Avoid printing to stderr in threads to prevent garbled output - return None + max_retries = 5 + for attempt in range(max_retries): + try: + with urllib.request.urlopen(req) as response: + # Proactive Rate Limit Handling via Headers + # https://docs.cloudsmith.com/api/rate-limits#monitoring-your-usage + remaining = response.headers.get('X-RateLimit-Remaining') + if remaining is not None and int(remaining) < 3: + reset = response.headers.get('X-RateLimit-Reset') + if reset: + wait = float(reset) - time.time() + if wait > 0 and wait < 30: # Only sleep if wait is reasonable + time.sleep(wait + 0.5) + + if method == 'DELETE': + return True + return json.loads(response.read().decode('utf-8')) + except urllib.error.HTTPError as e: + if e.code == 429: + # Rate limited - wait and retry + retry_after = e.headers.get('Retry-After') + if retry_after: + wait_time = float(retry_after) + else: + # Fallback to X-RateLimit-Reset + reset = e.headers.get('X-RateLimit-Reset') + if reset: + wait_time = float(reset) - time.time() + else: + wait_time = (2 ** attempt) + + if wait_time < 0: wait_time = 1 + time.sleep(wait_time + 0.5) + continue + elif e.code == 404: + return None + else: + return None + except Exception as e: + return None + + return None def find_key_recursive(obj, key): """Recursively searches for a key in a dictionary/list and returns a list of values.""" @@ -257,7 +293,7 @@ def fetch_untagged_data(pkg, workspace, repo, img, detailed=False): return rows, slug def get_untagged_images(workspace, repo, img, delete=False, detailed=False): - console.print("[bold]Searching for untagged manifest lists...[/bold]") + # console.print("[bold]Searching for untagged manifest lists...[/bold]") # Removed print api_url = f"https://api.cloudsmith.io/v1/packages/{workspace}/{repo}/" query = urlencode({'query': f"name:{img}"}) full_url = f"{api_url}?{query}" @@ -273,57 +309,130 @@ def get_untagged_images(workspace, repo, img, delete=False, detailed=False): untagged_pkgs.append(p) if not untagged_pkgs: - console.print("[yellow]No untagged manifest lists found.[/yellow]") - return + # console.print("[yellow]No untagged manifest lists found.[/yellow]") # Removed print + return None + + # Fetch data first + results_map = {} + packages_to_delete = [] + + with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: + futures = {executor.submit(fetch_untagged_data, pkg, workspace, repo, img, detailed): i for i, pkg in enumerate(untagged_pkgs)} + for future in concurrent.futures.as_completed(futures): + index = futures[future] + try: + rows, slug = future.result() + results_map[index] = (rows, slug) + packages_to_delete.append(slug) + except Exception: + pass + + # Perform Deletion if requested + deleted_slugs = set() + if delete and packages_to_delete: + batch_size = 10 + def delete_pkg_task(slug): + del_url = f"https://api.cloudsmith.io/v1/packages/{workspace}/{repo}/{slug}/" + return slug, make_request(del_url, method='DELETE') + + for i in range(0, len(packages_to_delete), batch_size): + batch = packages_to_delete[i:i + batch_size] + with concurrent.futures.ThreadPoolExecutor(max_workers=batch_size) as executor: + futures = [executor.submit(delete_pkg_task, slug) for slug in batch] + for future in concurrent.futures.as_completed(futures): + slug, success = future.result() + if success: + deleted_slugs.add(slug) + + if i + batch_size < len(packages_to_delete): + time.sleep(1.1) - # Create Table - table = Table(title="Untagged Manifest Lists", box=box.ROUNDED) + # Build Table + table = Table(title=f"Untagged Manifest Lists: {img}", box=box.ROUNDED) table.add_column("Tag", style="cyan") table.add_column("Type", style="magenta") table.add_column("Platform") table.add_column("Status") table.add_column("Downloads", justify="right") table.add_column("Digest", style="dim") + if delete: + table.add_column("Action", style="bold red") - packages_to_delete = [] - - with console.status("[bold green]Fetching untagged data...[/bold green]"): - with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: - # Submit all tasks - futures = {executor.submit(fetch_untagged_data, pkg, workspace, repo, img, detailed): i for i, pkg in enumerate(untagged_pkgs)} + for i in range(len(untagged_pkgs)): + if i in results_map: + rows, slug = results_map[i] - results = {} - for future in concurrent.futures.as_completed(futures): - index = futures[future] - try: - results[index] = future.result() - except Exception as e: - console.print(f"[red]Error processing untagged image: {e}[/red]") + action_str = "" + if delete: + if slug in deleted_slugs: + action_str = "Deleted" + else: + action_str = "Failed" + + for row in rows: + if row == "SECTION": + table.add_section() + else: + if delete: + table.add_row(*row, action_str) + else: + table.add_row(*row) + + return table + +def get_image_analysis(workspace, repo, img_name, detailed=False): + tags_url = f"{CLOUDSMITH_URL}/v2/{workspace}/{repo}/{img_name}/tags/list" + tags_json = make_request(tags_url, {"Accept": "application/vnd.oci.image.manifest.v1+json", "Cache-Control": "no-cache"}) + + tags = [] + if tags_json: + raw_tags = find_key_recursive(tags_json, 'tags') + flat_tags = [] + for item in raw_tags: + if isinstance(item, list): + flat_tags.extend(item) + else: + flat_tags.append(item) + + tags = sorted(list(set(flat_tags))) - # Add to table in original order - for i in range(len(untagged_pkgs)): - if i in results: - rows, slug = results[i] - packages_to_delete.append(slug) - for row in rows: - if row == "SECTION": - table.add_section() - else: - table.add_row(*row) + if not tags: + return None - console.print(table) + table = Table(title=f"Image Analysis: {img_name}", box=box.ROUNDED) + table.add_column("Tag", style="cyan") + table.add_column("Type", style="magenta") + table.add_column("Platform") + table.add_column("Status") + table.add_column("Downloads", justify="right") + table.add_column("Digest", style="dim") - if delete: - console.print("\n[bold red]Deleting untagged packages...[/bold red]") - for slug in packages_to_delete: - console.print(f" Deleting package: {slug}...", end=" ") - del_url = f"https://api.cloudsmith.io/v1/packages/{workspace}/{repo}/{slug}/" - req = urllib.request.Request(del_url, headers=AUTH_HEADER, method='DELETE') + with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: + future_to_tag = {executor.submit(fetch_tag_data, workspace, repo, img_name, t, detailed): t for t in tags} + + results = {} + for future in concurrent.futures.as_completed(future_to_tag): + tag = future_to_tag[future] try: - with urllib.request.urlopen(req): - console.print("[green]Deleted.[/green]") - except Exception as e: - console.print(f"[red]Failed: {e}[/red]") + results[tag] = future.result() + except Exception: + pass + + for t in tags: + if t in results: + rows = results[t] + for row in rows: + if row == "SECTION": + table.add_section() + else: + table.add_row(*row) + return table + +def process_image(org, repo, img_name, args): + if args.untagged or args.untagged_delete: + return get_untagged_images(org, repo, img_name, delete=args.untagged_delete, detailed=args.detailed) + else: + return get_image_analysis(org, repo, img_name, detailed=args.detailed) def main(): parser = argparse.ArgumentParser(description="Docker Multi-Arch Inspector") @@ -351,68 +460,38 @@ def main(): console.print("[red]Failed to fetch catalog or no images found.[/red]") sys.exit(1) - for img_name in images_to_scan: - console.print(f"\nDocker Image: [bold blue]{args.org}/{args.repo}/{img_name}[/bold blue]") - - if args.untagged or args.untagged_delete: - get_untagged_images(args.org, args.repo, img_name, delete=args.untagged_delete, detailed=args.detailed) - else: - # Get Tags - tags_url = f"{CLOUDSMITH_URL}/v2/{args.org}/{args.repo}/{img_name}/tags/list" - tags_json = make_request(tags_url, {"Accept": "application/vnd.oci.image.manifest.v1+json", "Cache-Control": "no-cache"}) + with Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + BarColumn(), + TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), + console=console + ) as progress: + task = progress.add_task(f"Scanning {len(images_to_scan)} images...", total=len(images_to_scan)) + + # Use a reasonable number of workers for images (e.g., 5) + # Each image might spawn its own threads for tags/digests + with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: + future_to_img = { + executor.submit(process_image, args.org, args.repo, img, args): img + for img in images_to_scan + } - tags = [] - if tags_json: - raw_tags = find_key_recursive(tags_json, 'tags') - flat_tags = [] - for item in raw_tags: - if isinstance(item, list): - flat_tags.extend(item) + for future in concurrent.futures.as_completed(future_to_img): + img_name = future_to_img[future] + try: + table = future.result() + if table: + # Print the table to the console (thread-safe via rich) + progress.console.print(table) + progress.console.print("") # Newline else: - flat_tags.append(item) + # Optional: log empty/no tags + pass + except Exception as e: + progress.console.print(f"[red]Error processing {img_name}: {e}[/red]") - tags = sorted(list(set(flat_tags))) - - if not tags: - console.print(f"[yellow]No tags found for {img_name}.[/yellow]") - continue - - console.print(f"Found matching tags: [bold]{len(tags)}[/bold]") - - # Create Main Table - table = Table(title=f"Image Analysis: {img_name}", box=box.ROUNDED) - table.add_column("Tag", style="cyan") - table.add_column("Type", style="magenta") - table.add_column("Platform") - table.add_column("Status") - table.add_column("Downloads", justify="right") - table.add_column("Digest", style="dim") - - with console.status(f"[bold green]Fetching data for {img_name}...[/bold green]"): - with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: - # Submit all tasks - future_to_tag = {executor.submit(fetch_tag_data, args.org, args.repo, img_name, t, args.detailed): t for t in tags} - - results = {} - for future in concurrent.futures.as_completed(future_to_tag): - tag = future_to_tag[future] - try: - results[tag] = future.result() - except Exception as exc: - console.print(f"[red]Tag {tag} generated an exception: {exc}[/red]") - - # Add to table in sorted order - for t in tags: - if t in results: - rows = results[t] - for row in rows: - if row == "SECTION": - table.add_section() - else: - table.add_row(*row) - - # Print the final table - console.print(table) + progress.advance(task) if __name__ == "__main__": main() \ No newline at end of file From da132e521a3be3b385b637e7d2bdb4055049c25a Mon Sep 17 00:00:00 2001 From: colinmoynes Date: Mon, 15 Dec 2025 13:31:46 +0000 Subject: [PATCH 2/3] error handling and minor changes --- Docker/Cloudsmith Docker Sleuth/multiarch.py | 56 +++++++++++++++++--- 1 file changed, 48 insertions(+), 8 deletions(-) diff --git a/Docker/Cloudsmith Docker Sleuth/multiarch.py b/Docker/Cloudsmith Docker Sleuth/multiarch.py index 767d8e2..03a227c 100755 --- a/Docker/Cloudsmith Docker Sleuth/multiarch.py +++ b/Docker/Cloudsmith Docker Sleuth/multiarch.py @@ -228,7 +228,7 @@ def fetch_tag_data(workspace, repo, img, ntag, detailed=False): "[magenta]manifest/list[/magenta]", "multi", status_display, - str(total_downloads), + f"[green]{total_downloads}[/green]", f"[dim]{index_digest}[/dim]" ]) @@ -280,7 +280,7 @@ def fetch_untagged_data(pkg, workspace, repo, img, detailed=False): "manifest/list", platform_str, status_display, - str(downloads), + f"[green]{downloads}[/green]", digest ]) @@ -435,6 +435,22 @@ def process_image(org, repo, img_name, args): return get_image_analysis(org, repo, img_name, detailed=args.detailed) def main(): + console.print(r"""[bold cyan] +██████╗██╗ ██████╗ ██╗ ██╗██████╗ ███████╗███╗ ███╗██╗████████╗██╗ ██╗ +██╔════╝██║ ██╔═══██╗██║ ██║██╔══██╗██╔════╝████╗ ████║██║╚══██╔══╝██║ ██║ +██║ ██║ ██║ ██║██║ ██║██║ ██║███████╗██╔████╔██║██║ ██║ ███████║ +██║ ██║ ██║ ██║██║ ██║██║ ██║╚════██║██║╚██╔╝██║██║ ██║ ██╔══██║ +╚██████╗███████╗╚██████╔╝╚██████╔╝██████╔╝███████║██║ ╚═╝ ██║██║ ██║ ██║ ██║ + ╚═════╝╚══════╝ ╚═════╝ ╚═════╝ ╚═════╝ ╚══════╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝ ╚═╝ + +██████╗ ██████╗ ██████╗██╗ ██╗███████╗██████╗ ███████╗██╗ ███████╗██╗ ██╗████████╗██╗ ██╗ +██╔══██╗██╔═══██╗██╔════╝██║ ██╔╝██╔════╝██╔══██╗ ██╔════╝██║ ██╔════╝██║ ██║╚══██╔══╝██║ ██║ +██║ ██║██║ ██║██║ █████╔╝ █████╗ ██████╔╝ ███████╗██║ █████╗ ██║ ██║ ██║ ███████║ +██║ ██║██║ ██║██║ ██╔═██╗ ██╔══╝ ██╔══██╗ ╚════██║██║ ██╔══╝ ██║ ██║ ██║ ██╔══██║ +██████╔╝╚██████╔╝╚██████╗██║ ██╗███████╗██║ ██║ ███████║███████╗███████╗╚██████╔╝ ██║ ██║ ██║ +╚═════╝ ╚═════╝ ╚═════╝╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝ ╚══════╝╚══════╝╚══════╝ ╚═════╝ ╚═╝ ╚═╝ ╚═╝ +[/bold cyan]""") + parser = argparse.ArgumentParser(description="Docker Multi-Arch Inspector") parser.add_argument("org", help="Cloudsmith Organization/User") parser.add_argument("repo", help="Cloudsmith Repository") @@ -467,11 +483,15 @@ def main(): TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), console=console ) as progress: - task = progress.add_task(f"Scanning {len(images_to_scan)} images...", total=len(images_to_scan)) + task = progress.add_task(f"Processing {len(images_to_scan)} images...", total=len(images_to_scan)) + collected_results = [] + # Use a reasonable number of workers for images (e.g., 5) # Each image might spawn its own threads for tags/digests - with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: + # Manually manage executor to handle KeyboardInterrupt gracefully + executor = concurrent.futures.ThreadPoolExecutor(max_workers=5) + try: future_to_img = { executor.submit(process_image, args.org, args.repo, img, args): img for img in images_to_scan @@ -482,9 +502,7 @@ def main(): try: table = future.result() if table: - # Print the table to the console (thread-safe via rich) - progress.console.print(table) - progress.console.print("") # Newline + collected_results.append((img_name, table)) else: # Optional: log empty/no tags pass @@ -492,6 +510,28 @@ def main(): progress.console.print(f"[red]Error processing {img_name}: {e}[/red]") progress.advance(task) + + # Normal shutdown + executor.shutdown(wait=True) + + except KeyboardInterrupt: + # Force shutdown without waiting + executor.shutdown(wait=False, cancel_futures=True) + raise + + # Sort results by image name and print + collected_results.sort(key=lambda x: x[0]) + + if not collected_results: + console.print("[yellow]No matching images or tags found.[/yellow]") + + for _, table in collected_results: + console.print(table) + console.print("") if __name__ == "__main__": - main() \ No newline at end of file + try: + main() + except KeyboardInterrupt: + console.print("\n[bold red]Operation cancelled by user.[/bold red]") + # Use os._exit to avoid \ No newline at end of file From cb613b2890c1b2b1a81b6396e5a573e538c099e4 Mon Sep 17 00:00:00 2001 From: colinmoynes Date: Mon, 15 Dec 2025 15:57:28 +0000 Subject: [PATCH 3/3] added new flag --output and support for json and csv format --- Docker/Cloudsmith Docker Sleuth/multiarch.py | 320 ++++++++++++------- 1 file changed, 199 insertions(+), 121 deletions(-) diff --git a/Docker/Cloudsmith Docker Sleuth/multiarch.py b/Docker/Cloudsmith Docker Sleuth/multiarch.py index 03a227c..d9a7ba6 100755 --- a/Docker/Cloudsmith Docker Sleuth/multiarch.py +++ b/Docker/Cloudsmith Docker Sleuth/multiarch.py @@ -3,6 +3,7 @@ import sys import os import json +import csv import argparse import urllib.request import urllib.error @@ -112,7 +113,7 @@ def format_status(status_str): # --- Core Logic --- def get_digest_data(workspace, repo, img, digest, ntag_display, platform="unknown"): - """Fetches data for a specific digest (child image) and returns row data.""" + """Fetches data for a specific digest (child image) and returns data dict.""" # 1. Fetch Manifest to get Architecture (Only if unknown) if platform == "unknown": @@ -140,13 +141,13 @@ def get_digest_data(workspace, repo, img, digest, ntag_display, platform="unknow api_url = f"https://api.cloudsmith.io/v1/packages/{workspace}/{repo}/?query=version:{version}" pkg_details = make_request(api_url, {"Cache-Control": "no-cache"}) - status_display = "" + status_raw = "Unknown" dl = 0 if pkg_details: statuses = set(find_key_recursive(pkg_details, 'status_str')) - status_parts = [format_status(s) for s in statuses] - status_display = " ".join(status_parts) + if statuses: + status_raw = " ".join(sorted(list(statuses))) downloads = find_key_recursive(pkg_details, 'downloads') if len(downloads) >= 2: @@ -154,19 +155,18 @@ def get_digest_data(workspace, repo, img, digest, ntag_display, platform="unknow elif len(downloads) > 0: dl = downloads[0] - # Return tuple of (Row Columns List, Download Count) - row_data = [ - f" └─ {ntag_display}", - "image", - platform, - status_display, - str(dl), - f"[dim]{digest}[/dim]" - ] - return row_data, dl + return { + "tag": ntag_display, + "type": "image", + "platform": platform, + "status": status_raw, + "downloads": dl, + "digest": digest, + "is_child": True + } def fetch_tag_data(workspace, repo, img, ntag, detailed=False): - """Fetches the manifest list for a tag and returns rows for the table.""" + """Fetches the manifest list for a tag and returns a list of data dicts.""" manifest_url = f"{CLOUDSMITH_URL}/v2/{workspace}/{repo}/{img}/manifests/{ntag}" manifest_json = make_request(manifest_url, {"Accept": "application/vnd.oci.image.manifest.v1+json", "Cache-Control": "no-cache"}) @@ -196,13 +196,13 @@ def fetch_tag_data(workspace, repo, img, ntag, detailed=False): return [] # Process children - children_rows = [] + children_data = [] total_downloads = 0 for child in children: - row, dl = get_digest_data(workspace, repo, img, child['digest'], ntag, platform=child['platform']) - children_rows.append(row) - total_downloads += dl + data = get_digest_data(workspace, repo, img, child['digest'], ntag, platform=child['platform']) + children_data.append(data) + total_downloads += data['downloads'] # Fetch parent package info api_url = f"https://api.cloudsmith.io/v1/packages/{workspace}/{repo}/?query=version:{ntag}" @@ -219,25 +219,23 @@ def fetch_tag_data(workspace, repo, img, ntag, detailed=False): else: index_digest = ver - status_display = format_status(parent_status) - - rows = [] - # Parent Row - rows.append([ - f"[bold cyan]{ntag}[/bold cyan]", - "[magenta]manifest/list[/magenta]", - "multi", - status_display, - f"[green]{total_downloads}[/green]", - f"[dim]{index_digest}[/dim]" - ]) - - # Children Rows + results = [] + # Parent Data + results.append({ + "tag": ntag, + "type": "manifest/list", + "platform": "multi", + "status": parent_status, + "downloads": total_downloads, + "digest": index_digest, + "is_child": False + }) + + # Children Data if detailed: - rows.extend(children_rows) - rows.append("SECTION") + results.extend(children_data) - return rows + return results def fetch_untagged_data(pkg, workspace, repo, img, detailed=False): digest = pkg.get('version') @@ -272,28 +270,26 @@ def fetch_untagged_data(pkg, workspace, repo, img, detailed=False): platform_str = " ".join(sorted(list(archs))) - status_display = format_status(status) - - rows = [] - rows.append([ - "(untagged)", - "manifest/list", - platform_str, - status_display, - f"[green]{downloads}[/green]", - digest - ]) + results = [] + results.append({ + "tag": "(untagged)", + "type": "manifest/list", + "platform": platform_str, + "status": status, + "downloads": downloads, + "digest": digest, + "is_child": False, + "slug": slug # Internal use + }) if detailed: for child in child_digests: - row, _ = get_digest_data(workspace, repo, img, child['digest'], "(untagged)", platform=child['platform']) - rows.append(row) - rows.append("SECTION") + data = get_digest_data(workspace, repo, img, child['digest'], "(untagged)", platform=child['platform']) + results.append(data) - return rows, slug + return results, slug def get_untagged_images(workspace, repo, img, delete=False, detailed=False): - # console.print("[bold]Searching for untagged manifest lists...[/bold]") # Removed print api_url = f"https://api.cloudsmith.io/v1/packages/{workspace}/{repo}/" query = urlencode({'query': f"name:{img}"}) full_url = f"{api_url}?{query}" @@ -309,7 +305,6 @@ def get_untagged_images(workspace, repo, img, delete=False, detailed=False): untagged_pkgs.append(p) if not untagged_pkgs: - # console.print("[yellow]No untagged manifest lists found.[/yellow]") # Removed print return None # Fetch data first @@ -329,6 +324,7 @@ def get_untagged_images(workspace, repo, img, delete=False, detailed=False): # Perform Deletion if requested deleted_slugs = set() + failed_slugs = set() if delete and packages_to_delete: batch_size = 10 def delete_pkg_task(slug): @@ -343,42 +339,34 @@ def delete_pkg_task(slug): slug, success = future.result() if success: deleted_slugs.add(slug) + else: + failed_slugs.add(slug) if i + batch_size < len(packages_to_delete): time.sleep(1.1) - # Build Table - table = Table(title=f"Untagged Manifest Lists: {img}", box=box.ROUNDED) - table.add_column("Tag", style="cyan") - table.add_column("Type", style="magenta") - table.add_column("Platform") - table.add_column("Status") - table.add_column("Downloads", justify="right") - table.add_column("Digest", style="dim") - if delete: - table.add_column("Action", style="bold red") - + # Build Result Groups + groups = [] for i in range(len(untagged_pkgs)): if i in results_map: rows, slug = results_map[i] + # Update action status action_str = "" if delete: if slug in deleted_slugs: action_str = "Deleted" - else: + elif slug in failed_slugs: action_str = "Failed" - + for row in rows: - if row == "SECTION": - table.add_section() - else: - if delete: - table.add_row(*row, action_str) - else: - table.add_row(*row) + row['action'] = action_str + # Remove internal slug + if 'slug' in row: del row['slug'] + + groups.append(rows) - return table + return groups def get_image_analysis(workspace, repo, img_name, detailed=False): tags_url = f"{CLOUDSMITH_URL}/v2/{workspace}/{repo}/{img_name}/tags/list" @@ -399,14 +387,7 @@ def get_image_analysis(workspace, repo, img_name, detailed=False): if not tags: return None - table = Table(title=f"Image Analysis: {img_name}", box=box.ROUNDED) - table.add_column("Tag", style="cyan") - table.add_column("Type", style="magenta") - table.add_column("Platform") - table.add_column("Status") - table.add_column("Downloads", justify="right") - table.add_column("Digest", style="dim") - + groups = [] with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: future_to_tag = {executor.submit(fetch_tag_data, workspace, repo, img_name, t, detailed): t for t in tags} @@ -420,13 +401,8 @@ def get_image_analysis(workspace, repo, img_name, detailed=False): for t in tags: if t in results: - rows = results[t] - for row in rows: - if row == "SECTION": - table.add_section() - else: - table.add_row(*row) - return table + groups.append(results[t]) + return groups def process_image(org, repo, img_name, args): if args.untagged or args.untagged_delete: @@ -434,6 +410,51 @@ def process_image(org, repo, img_name, args): else: return get_image_analysis(org, repo, img_name, detailed=args.detailed) +def render_table(image_name, groups, is_untagged=False, has_action=False): + title = f"Untagged Manifest Lists: {image_name}" if is_untagged else f"Image Analysis: {image_name}" + table = Table(title=title, box=box.ROUNDED) + table.add_column("Tag", style="cyan") + table.add_column("Type", style="magenta") + table.add_column("Platform") + table.add_column("Status") + table.add_column("Downloads", justify="right") + table.add_column("Digest", style="dim") + if has_action: + table.add_column("Action", style="bold red") + + for i, group in enumerate(groups): + if i > 0: + table.add_section() + + for row in group: + # Format for Table + tag_display = row['tag'] + if row['is_child']: + tag_display = f" └─ {row['tag']}" + else: + tag_display = f"[bold cyan]{row['tag']}[/bold cyan]" + + type_display = row['type'] + if type_display == 'manifest/list': + type_display = "[magenta]manifest/list[/magenta]" + + status_display = format_status(row['status']) + + dl_display = str(row['downloads']) + if row['type'] == 'manifest/list': + dl_display = f"[green]{dl_display}[/green]" + + digest_display = f"[dim]{row['digest']}[/dim]" + + row_data = [tag_display, type_display, row['platform'], status_display, dl_display, digest_display] + + if has_action: + row_data.append(row.get('action', '')) + + table.add_row(*row_data) + + return table + def main(): console.print(r"""[bold cyan] ██████╗██╗ ██████╗ ██╗ ██╗██████╗ ███████╗███╗ ███╗██╗████████╗██╗ ██╗ @@ -458,6 +479,7 @@ def main(): parser.add_argument("--untagged", action="store_true", help="Find untagged manifest lists") parser.add_argument("--untagged-delete", action="store_true", help="Delete untagged manifest lists") parser.add_argument("--detailed", action="store_true", help="Show detailed breakdown of digests") + parser.add_argument("--output", choices=['table', 'json', 'csv'], default='table', help="Output format (default: table)") args = parser.parse_args() @@ -466,30 +488,45 @@ def main(): if args.img: images_to_scan.append(args.img) else: - console.print(f"[bold]Fetching catalog for {args.org}/{args.repo}...[/bold]") + if args.output == 'table': + console.print(f"[bold]Fetching catalog for {args.org}/{args.repo}...[/bold]") catalog_url = f"{CLOUDSMITH_URL}/v2/{args.org}/{args.repo}/_catalog" catalog_json = make_request(catalog_url, {"Accept": "application/json", "Cache-Control": "no-cache"}) if catalog_json and 'repositories' in catalog_json: images_to_scan = catalog_json['repositories'] else: - console.print("[red]Failed to fetch catalog or no images found.[/red]") + if args.output == 'table': + console.print("[red]Failed to fetch catalog or no images found.[/red]") sys.exit(1) - with Progress( - SpinnerColumn(), - TextColumn("[progress.description]{task.description}"), - BarColumn(), - TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), - console=console - ) as progress: - task = progress.add_task(f"Processing {len(images_to_scan)} images...", total=len(images_to_scan)) + # Only show progress bar for table output + if args.output == 'table': + progress_ctx = Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + BarColumn(), + TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), + console=console + ) + else: + # Dummy context manager for non-table output + class DummyProgress: + def __enter__(self): return self + def __exit__(self, *args): pass + def add_task(self, *args, **kwargs): return None + def advance(self, *args, **kwargs): pass + @property + def console(self): return console # fallback + progress_ctx = DummyProgress() + + collected_results = [] + + with progress_ctx as progress: + if args.output == 'table': + task = progress.add_task(f"Processing {len(images_to_scan)} images...", total=len(images_to_scan)) - collected_results = [] - # Use a reasonable number of workers for images (e.g., 5) - # Each image might spawn its own threads for tags/digests - # Manually manage executor to handle KeyboardInterrupt gracefully executor = concurrent.futures.ThreadPoolExecutor(max_workers=5) try: future_to_img = { @@ -500,38 +537,79 @@ def main(): for future in concurrent.futures.as_completed(future_to_img): img_name = future_to_img[future] try: - table = future.result() - if table: - collected_results.append((img_name, table)) - else: - # Optional: log empty/no tags - pass + groups = future.result() + if groups: + collected_results.append((img_name, groups)) except Exception as e: - progress.console.print(f"[red]Error processing {img_name}: {e}[/red]") + if args.output == 'table': + progress.console.print(f"[red]Error processing {img_name}: {e}[/red]") - progress.advance(task) + if args.output == 'table': + progress.advance(task) - # Normal shutdown executor.shutdown(wait=True) except KeyboardInterrupt: - # Force shutdown without waiting executor.shutdown(wait=False, cancel_futures=True) raise - # Sort results by image name and print + # Sort results by image name collected_results.sort(key=lambda x: x[0]) if not collected_results: - console.print("[yellow]No matching images or tags found.[/yellow]") - - for _, table in collected_results: - console.print(table) - console.print("") + if args.output == 'table': + console.print("[yellow]No matching images or tags found.[/yellow]") + elif args.output == 'json': + print("[]") + return + + # --- Output Handling --- + + if args.output == 'table': + for img_name, groups in collected_results: + is_untagged = args.untagged or args.untagged_delete + has_action = args.untagged_delete + table = render_table(img_name, groups, is_untagged, has_action) + console.print(table) + console.print("") + + elif args.output == 'json': + # Flatten structure for JSON: List of objects, each with 'image' field + json_output = [] + for img_name, groups in collected_results: + for group in groups: + for row in group: + row_copy = row.copy() + row_copy['image'] = img_name + json_output.append(row_copy) + print(json.dumps(json_output, indent=2)) + + elif args.output == 'csv': + # Flatten structure for CSV + csv_rows = [] + fieldnames = ['image', 'tag', 'type', 'platform', 'status', 'downloads', 'digest', 'is_child', 'action'] + + for img_name, groups in collected_results: + for group in groups: + for row in group: + row_copy = row.copy() + row_copy['image'] = img_name + # Ensure all fields exist + for f in fieldnames: + if f not in row_copy: + row_copy[f] = '' + csv_rows.append(row_copy) + + writer = csv.DictWriter(sys.stdout, fieldnames=fieldnames) + writer.writeheader() + writer.writerows(csv_rows) if __name__ == "__main__": try: main() except KeyboardInterrupt: - console.print("\n[bold red]Operation cancelled by user.[/bold red]") - # Use os._exit to avoid \ No newline at end of file + if 'console' in globals(): + console.print("\n[bold red]Operation cancelled by user.[/bold red]") + else: + print("\nOperation cancelled by user.") + sys.exit(1) \ No newline at end of file