From c0b694accf9dce2730631d21c381357ec2d94c97 Mon Sep 17 00:00:00 2001 From: colinmoynes Date: Mon, 15 Dec 2025 11:06:53 +0000 Subject: [PATCH 1/6] Implementing rate limit handling and improved multi threaded performance. --- Docker/Cloudsmith Docker Sleuth/multiarch.py | 293 ++++++++++++------- 1 file changed, 186 insertions(+), 107 deletions(-) diff --git a/Docker/Cloudsmith Docker Sleuth/multiarch.py b/Docker/Cloudsmith Docker Sleuth/multiarch.py index cabecd3..767d8e2 100755 --- a/Docker/Cloudsmith Docker Sleuth/multiarch.py +++ b/Docker/Cloudsmith Docker Sleuth/multiarch.py @@ -8,6 +8,7 @@ import urllib.error from urllib.parse import urlencode import concurrent.futures +import time # Try to import rich try: @@ -15,6 +16,7 @@ from rich.table import Table from rich import box from rich.text import Text + from rich.progress import Progress, SpinnerColumn, BarColumn, TextColumn except ImportError: print("Error: This script requires the 'rich' library.") print("Please install it using: pip install rich") @@ -32,7 +34,7 @@ # --- Helper Functions --- def make_request(url, headers=None, method='GET', data=None): - """Performs an HTTP request and returns parsed JSON.""" + """Performs an HTTP request and returns parsed JSON. Handles rate limiting.""" if headers is None: headers = {} @@ -42,14 +44,48 @@ def make_request(url, headers=None, method='GET', data=None): if data: req.data = data.encode('utf-8') - try: - with urllib.request.urlopen(req) as response: - return json.loads(response.read().decode('utf-8')) - except urllib.error.HTTPError as e: - return None - except Exception as e: - # Avoid printing to stderr in threads to prevent garbled output - return None + max_retries = 5 + for attempt in range(max_retries): + try: + with urllib.request.urlopen(req) as response: + # Proactive Rate Limit Handling via Headers + # https://docs.cloudsmith.com/api/rate-limits#monitoring-your-usage + remaining = response.headers.get('X-RateLimit-Remaining') + if remaining is not None and int(remaining) < 3: + reset = response.headers.get('X-RateLimit-Reset') + if reset: + wait = float(reset) - time.time() + if wait > 0 and wait < 30: # Only sleep if wait is reasonable + time.sleep(wait + 0.5) + + if method == 'DELETE': + return True + return json.loads(response.read().decode('utf-8')) + except urllib.error.HTTPError as e: + if e.code == 429: + # Rate limited - wait and retry + retry_after = e.headers.get('Retry-After') + if retry_after: + wait_time = float(retry_after) + else: + # Fallback to X-RateLimit-Reset + reset = e.headers.get('X-RateLimit-Reset') + if reset: + wait_time = float(reset) - time.time() + else: + wait_time = (2 ** attempt) + + if wait_time < 0: wait_time = 1 + time.sleep(wait_time + 0.5) + continue + elif e.code == 404: + return None + else: + return None + except Exception as e: + return None + + return None def find_key_recursive(obj, key): """Recursively searches for a key in a dictionary/list and returns a list of values.""" @@ -257,7 +293,7 @@ def fetch_untagged_data(pkg, workspace, repo, img, detailed=False): return rows, slug def get_untagged_images(workspace, repo, img, delete=False, detailed=False): - console.print("[bold]Searching for untagged manifest lists...[/bold]") + # console.print("[bold]Searching for untagged manifest lists...[/bold]") # Removed print api_url = f"https://api.cloudsmith.io/v1/packages/{workspace}/{repo}/" query = urlencode({'query': f"name:{img}"}) full_url = f"{api_url}?{query}" @@ -273,57 +309,130 @@ def get_untagged_images(workspace, repo, img, delete=False, detailed=False): untagged_pkgs.append(p) if not untagged_pkgs: - console.print("[yellow]No untagged manifest lists found.[/yellow]") - return + # console.print("[yellow]No untagged manifest lists found.[/yellow]") # Removed print + return None + + # Fetch data first + results_map = {} + packages_to_delete = [] + + with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: + futures = {executor.submit(fetch_untagged_data, pkg, workspace, repo, img, detailed): i for i, pkg in enumerate(untagged_pkgs)} + for future in concurrent.futures.as_completed(futures): + index = futures[future] + try: + rows, slug = future.result() + results_map[index] = (rows, slug) + packages_to_delete.append(slug) + except Exception: + pass + + # Perform Deletion if requested + deleted_slugs = set() + if delete and packages_to_delete: + batch_size = 10 + def delete_pkg_task(slug): + del_url = f"https://api.cloudsmith.io/v1/packages/{workspace}/{repo}/{slug}/" + return slug, make_request(del_url, method='DELETE') + + for i in range(0, len(packages_to_delete), batch_size): + batch = packages_to_delete[i:i + batch_size] + with concurrent.futures.ThreadPoolExecutor(max_workers=batch_size) as executor: + futures = [executor.submit(delete_pkg_task, slug) for slug in batch] + for future in concurrent.futures.as_completed(futures): + slug, success = future.result() + if success: + deleted_slugs.add(slug) + + if i + batch_size < len(packages_to_delete): + time.sleep(1.1) - # Create Table - table = Table(title="Untagged Manifest Lists", box=box.ROUNDED) + # Build Table + table = Table(title=f"Untagged Manifest Lists: {img}", box=box.ROUNDED) table.add_column("Tag", style="cyan") table.add_column("Type", style="magenta") table.add_column("Platform") table.add_column("Status") table.add_column("Downloads", justify="right") table.add_column("Digest", style="dim") + if delete: + table.add_column("Action", style="bold red") - packages_to_delete = [] - - with console.status("[bold green]Fetching untagged data...[/bold green]"): - with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: - # Submit all tasks - futures = {executor.submit(fetch_untagged_data, pkg, workspace, repo, img, detailed): i for i, pkg in enumerate(untagged_pkgs)} + for i in range(len(untagged_pkgs)): + if i in results_map: + rows, slug = results_map[i] - results = {} - for future in concurrent.futures.as_completed(futures): - index = futures[future] - try: - results[index] = future.result() - except Exception as e: - console.print(f"[red]Error processing untagged image: {e}[/red]") + action_str = "" + if delete: + if slug in deleted_slugs: + action_str = "Deleted" + else: + action_str = "Failed" + + for row in rows: + if row == "SECTION": + table.add_section() + else: + if delete: + table.add_row(*row, action_str) + else: + table.add_row(*row) + + return table + +def get_image_analysis(workspace, repo, img_name, detailed=False): + tags_url = f"{CLOUDSMITH_URL}/v2/{workspace}/{repo}/{img_name}/tags/list" + tags_json = make_request(tags_url, {"Accept": "application/vnd.oci.image.manifest.v1+json", "Cache-Control": "no-cache"}) + + tags = [] + if tags_json: + raw_tags = find_key_recursive(tags_json, 'tags') + flat_tags = [] + for item in raw_tags: + if isinstance(item, list): + flat_tags.extend(item) + else: + flat_tags.append(item) + + tags = sorted(list(set(flat_tags))) - # Add to table in original order - for i in range(len(untagged_pkgs)): - if i in results: - rows, slug = results[i] - packages_to_delete.append(slug) - for row in rows: - if row == "SECTION": - table.add_section() - else: - table.add_row(*row) + if not tags: + return None - console.print(table) + table = Table(title=f"Image Analysis: {img_name}", box=box.ROUNDED) + table.add_column("Tag", style="cyan") + table.add_column("Type", style="magenta") + table.add_column("Platform") + table.add_column("Status") + table.add_column("Downloads", justify="right") + table.add_column("Digest", style="dim") - if delete: - console.print("\n[bold red]Deleting untagged packages...[/bold red]") - for slug in packages_to_delete: - console.print(f" Deleting package: {slug}...", end=" ") - del_url = f"https://api.cloudsmith.io/v1/packages/{workspace}/{repo}/{slug}/" - req = urllib.request.Request(del_url, headers=AUTH_HEADER, method='DELETE') + with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: + future_to_tag = {executor.submit(fetch_tag_data, workspace, repo, img_name, t, detailed): t for t in tags} + + results = {} + for future in concurrent.futures.as_completed(future_to_tag): + tag = future_to_tag[future] try: - with urllib.request.urlopen(req): - console.print("[green]Deleted.[/green]") - except Exception as e: - console.print(f"[red]Failed: {e}[/red]") + results[tag] = future.result() + except Exception: + pass + + for t in tags: + if t in results: + rows = results[t] + for row in rows: + if row == "SECTION": + table.add_section() + else: + table.add_row(*row) + return table + +def process_image(org, repo, img_name, args): + if args.untagged or args.untagged_delete: + return get_untagged_images(org, repo, img_name, delete=args.untagged_delete, detailed=args.detailed) + else: + return get_image_analysis(org, repo, img_name, detailed=args.detailed) def main(): parser = argparse.ArgumentParser(description="Docker Multi-Arch Inspector") @@ -351,68 +460,38 @@ def main(): console.print("[red]Failed to fetch catalog or no images found.[/red]") sys.exit(1) - for img_name in images_to_scan: - console.print(f"\nDocker Image: [bold blue]{args.org}/{args.repo}/{img_name}[/bold blue]") - - if args.untagged or args.untagged_delete: - get_untagged_images(args.org, args.repo, img_name, delete=args.untagged_delete, detailed=args.detailed) - else: - # Get Tags - tags_url = f"{CLOUDSMITH_URL}/v2/{args.org}/{args.repo}/{img_name}/tags/list" - tags_json = make_request(tags_url, {"Accept": "application/vnd.oci.image.manifest.v1+json", "Cache-Control": "no-cache"}) + with Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + BarColumn(), + TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), + console=console + ) as progress: + task = progress.add_task(f"Scanning {len(images_to_scan)} images...", total=len(images_to_scan)) + + # Use a reasonable number of workers for images (e.g., 5) + # Each image might spawn its own threads for tags/digests + with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: + future_to_img = { + executor.submit(process_image, args.org, args.repo, img, args): img + for img in images_to_scan + } - tags = [] - if tags_json: - raw_tags = find_key_recursive(tags_json, 'tags') - flat_tags = [] - for item in raw_tags: - if isinstance(item, list): - flat_tags.extend(item) + for future in concurrent.futures.as_completed(future_to_img): + img_name = future_to_img[future] + try: + table = future.result() + if table: + # Print the table to the console (thread-safe via rich) + progress.console.print(table) + progress.console.print("") # Newline else: - flat_tags.append(item) + # Optional: log empty/no tags + pass + except Exception as e: + progress.console.print(f"[red]Error processing {img_name}: {e}[/red]") - tags = sorted(list(set(flat_tags))) - - if not tags: - console.print(f"[yellow]No tags found for {img_name}.[/yellow]") - continue - - console.print(f"Found matching tags: [bold]{len(tags)}[/bold]") - - # Create Main Table - table = Table(title=f"Image Analysis: {img_name}", box=box.ROUNDED) - table.add_column("Tag", style="cyan") - table.add_column("Type", style="magenta") - table.add_column("Platform") - table.add_column("Status") - table.add_column("Downloads", justify="right") - table.add_column("Digest", style="dim") - - with console.status(f"[bold green]Fetching data for {img_name}...[/bold green]"): - with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: - # Submit all tasks - future_to_tag = {executor.submit(fetch_tag_data, args.org, args.repo, img_name, t, args.detailed): t for t in tags} - - results = {} - for future in concurrent.futures.as_completed(future_to_tag): - tag = future_to_tag[future] - try: - results[tag] = future.result() - except Exception as exc: - console.print(f"[red]Tag {tag} generated an exception: {exc}[/red]") - - # Add to table in sorted order - for t in tags: - if t in results: - rows = results[t] - for row in rows: - if row == "SECTION": - table.add_section() - else: - table.add_row(*row) - - # Print the final table - console.print(table) + progress.advance(task) if __name__ == "__main__": main() \ No newline at end of file From da132e521a3be3b385b637e7d2bdb4055049c25a Mon Sep 17 00:00:00 2001 From: colinmoynes Date: Mon, 15 Dec 2025 13:31:46 +0000 Subject: [PATCH 2/6] error handling and minor changes --- Docker/Cloudsmith Docker Sleuth/multiarch.py | 56 +++++++++++++++++--- 1 file changed, 48 insertions(+), 8 deletions(-) diff --git a/Docker/Cloudsmith Docker Sleuth/multiarch.py b/Docker/Cloudsmith Docker Sleuth/multiarch.py index 767d8e2..03a227c 100755 --- a/Docker/Cloudsmith Docker Sleuth/multiarch.py +++ b/Docker/Cloudsmith Docker Sleuth/multiarch.py @@ -228,7 +228,7 @@ def fetch_tag_data(workspace, repo, img, ntag, detailed=False): "[magenta]manifest/list[/magenta]", "multi", status_display, - str(total_downloads), + f"[green]{total_downloads}[/green]", f"[dim]{index_digest}[/dim]" ]) @@ -280,7 +280,7 @@ def fetch_untagged_data(pkg, workspace, repo, img, detailed=False): "manifest/list", platform_str, status_display, - str(downloads), + f"[green]{downloads}[/green]", digest ]) @@ -435,6 +435,22 @@ def process_image(org, repo, img_name, args): return get_image_analysis(org, repo, img_name, detailed=args.detailed) def main(): + console.print(r"""[bold cyan] +██████╗██╗ ██████╗ ██╗ ██╗██████╗ ███████╗███╗ ███╗██╗████████╗██╗ ██╗ +██╔════╝██║ ██╔═══██╗██║ ██║██╔══██╗██╔════╝████╗ ████║██║╚══██╔══╝██║ ██║ +██║ ██║ ██║ ██║██║ ██║██║ ██║███████╗██╔████╔██║██║ ██║ ███████║ +██║ ██║ ██║ ██║██║ ██║██║ ██║╚════██║██║╚██╔╝██║██║ ██║ ██╔══██║ +╚██████╗███████╗╚██████╔╝╚██████╔╝██████╔╝███████║██║ ╚═╝ ██║██║ ██║ ██║ ██║ + ╚═════╝╚══════╝ ╚═════╝ ╚═════╝ ╚═════╝ ╚══════╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝ ╚═╝ + +██████╗ ██████╗ ██████╗██╗ ██╗███████╗██████╗ ███████╗██╗ ███████╗██╗ ██╗████████╗██╗ ██╗ +██╔══██╗██╔═══██╗██╔════╝██║ ██╔╝██╔════╝██╔══██╗ ██╔════╝██║ ██╔════╝██║ ██║╚══██╔══╝██║ ██║ +██║ ██║██║ ██║██║ █████╔╝ █████╗ ██████╔╝ ███████╗██║ █████╗ ██║ ██║ ██║ ███████║ +██║ ██║██║ ██║██║ ██╔═██╗ ██╔══╝ ██╔══██╗ ╚════██║██║ ██╔══╝ ██║ ██║ ██║ ██╔══██║ +██████╔╝╚██████╔╝╚██████╗██║ ██╗███████╗██║ ██║ ███████║███████╗███████╗╚██████╔╝ ██║ ██║ ██║ +╚═════╝ ╚═════╝ ╚═════╝╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝ ╚══════╝╚══════╝╚══════╝ ╚═════╝ ╚═╝ ╚═╝ ╚═╝ +[/bold cyan]""") + parser = argparse.ArgumentParser(description="Docker Multi-Arch Inspector") parser.add_argument("org", help="Cloudsmith Organization/User") parser.add_argument("repo", help="Cloudsmith Repository") @@ -467,11 +483,15 @@ def main(): TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), console=console ) as progress: - task = progress.add_task(f"Scanning {len(images_to_scan)} images...", total=len(images_to_scan)) + task = progress.add_task(f"Processing {len(images_to_scan)} images...", total=len(images_to_scan)) + collected_results = [] + # Use a reasonable number of workers for images (e.g., 5) # Each image might spawn its own threads for tags/digests - with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: + # Manually manage executor to handle KeyboardInterrupt gracefully + executor = concurrent.futures.ThreadPoolExecutor(max_workers=5) + try: future_to_img = { executor.submit(process_image, args.org, args.repo, img, args): img for img in images_to_scan @@ -482,9 +502,7 @@ def main(): try: table = future.result() if table: - # Print the table to the console (thread-safe via rich) - progress.console.print(table) - progress.console.print("") # Newline + collected_results.append((img_name, table)) else: # Optional: log empty/no tags pass @@ -492,6 +510,28 @@ def main(): progress.console.print(f"[red]Error processing {img_name}: {e}[/red]") progress.advance(task) + + # Normal shutdown + executor.shutdown(wait=True) + + except KeyboardInterrupt: + # Force shutdown without waiting + executor.shutdown(wait=False, cancel_futures=True) + raise + + # Sort results by image name and print + collected_results.sort(key=lambda x: x[0]) + + if not collected_results: + console.print("[yellow]No matching images or tags found.[/yellow]") + + for _, table in collected_results: + console.print(table) + console.print("") if __name__ == "__main__": - main() \ No newline at end of file + try: + main() + except KeyboardInterrupt: + console.print("\n[bold red]Operation cancelled by user.[/bold red]") + # Use os._exit to avoid \ No newline at end of file From 826f24ac65ae0820cfb8986cac075ea22a1e7c68 Mon Sep 17 00:00:00 2001 From: colinmoynes Date: Tue, 16 Dec 2025 17:41:38 +0000 Subject: [PATCH 3/6] enhanced deletion capabilities --- Docker/Cloudsmith Docker Sleuth/multiarch.py | 356 +++++++++++++------ 1 file changed, 248 insertions(+), 108 deletions(-) diff --git a/Docker/Cloudsmith Docker Sleuth/multiarch.py b/Docker/Cloudsmith Docker Sleuth/multiarch.py index 03a227c..e6575cc 100755 --- a/Docker/Cloudsmith Docker Sleuth/multiarch.py +++ b/Docker/Cloudsmith Docker Sleuth/multiarch.py @@ -109,10 +109,38 @@ def format_status(status_str): if status_str == "Failed": return f"[bold red]{status_str}[/bold red] ❌" return status_str +def batch_delete_packages(workspace, repo, slugs): + """Deletes a list of package slugs in batches to respect rate limits.""" + deleted = set() + failed = set() + if not slugs: + return deleted, failed + + batch_size = 10 + def delete_pkg_task(slug): + del_url = f"https://api.cloudsmith.io/v1/packages/{workspace}/{repo}/{slug}/" + return slug, make_request(del_url, method='DELETE') + + for i in range(0, len(slugs), batch_size): + batch = slugs[i:i + batch_size] + with concurrent.futures.ThreadPoolExecutor(max_workers=batch_size) as executor: + futures = [executor.submit(delete_pkg_task, slug) for slug in batch] + for future in concurrent.futures.as_completed(futures): + slug, success = future.result() + if success: + deleted.add(slug) + else: + failed.add(slug) + + if i + batch_size < len(slugs): + time.sleep(1.1) + + return deleted, failed + # --- Core Logic --- def get_digest_data(workspace, repo, img, digest, ntag_display, platform="unknown"): - """Fetches data for a specific digest (child image) and returns row data.""" + """Fetches data for a specific digest (child image) and returns data dict.""" # 1. Fetch Manifest to get Architecture (Only if unknown) if platform == "unknown": @@ -140,13 +168,13 @@ def get_digest_data(workspace, repo, img, digest, ntag_display, platform="unknow api_url = f"https://api.cloudsmith.io/v1/packages/{workspace}/{repo}/?query=version:{version}" pkg_details = make_request(api_url, {"Cache-Control": "no-cache"}) - status_display = "" + status_raw = "Unknown" dl = 0 if pkg_details: statuses = set(find_key_recursive(pkg_details, 'status_str')) - status_parts = [format_status(s) for s in statuses] - status_display = " ".join(status_parts) + if statuses: + status_raw = " ".join(sorted(list(statuses))) downloads = find_key_recursive(pkg_details, 'downloads') if len(downloads) >= 2: @@ -154,19 +182,18 @@ def get_digest_data(workspace, repo, img, digest, ntag_display, platform="unknow elif len(downloads) > 0: dl = downloads[0] - # Return tuple of (Row Columns List, Download Count) - row_data = [ - f" └─ {ntag_display}", - "image", - platform, - status_display, - str(dl), - f"[dim]{digest}[/dim]" - ] - return row_data, dl + return { + "tag": ntag_display, + "type": "image", + "platform": platform, + "status": status_raw, + "downloads": dl, + "digest": digest, + "is_child": True + } def fetch_tag_data(workspace, repo, img, ntag, detailed=False): - """Fetches the manifest list for a tag and returns rows for the table.""" + """Fetches the manifest list for a tag and returns a list of data dicts.""" manifest_url = f"{CLOUDSMITH_URL}/v2/{workspace}/{repo}/{img}/manifests/{ntag}" manifest_json = make_request(manifest_url, {"Accept": "application/vnd.oci.image.manifest.v1+json", "Cache-Control": "no-cache"}) @@ -196,13 +223,13 @@ def fetch_tag_data(workspace, repo, img, ntag, detailed=False): return [] # Process children - children_rows = [] + children_data = [] total_downloads = 0 for child in children: - row, dl = get_digest_data(workspace, repo, img, child['digest'], ntag, platform=child['platform']) - children_rows.append(row) - total_downloads += dl + data = get_digest_data(workspace, repo, img, child['digest'], ntag, platform=child['platform']) + children_data.append(data) + total_downloads += data['downloads'] # Fetch parent package info api_url = f"https://api.cloudsmith.io/v1/packages/{workspace}/{repo}/?query=version:{ntag}" @@ -210,34 +237,35 @@ def fetch_tag_data(workspace, repo, img, ntag, detailed=False): parent_status = "Unknown" index_digest = "" + slug = "" if pkg_details and len(pkg_details) > 0: parent_status = pkg_details[0].get('status_str', 'Unknown') + slug = pkg_details[0].get('slug', '') ver = pkg_details[0].get('version', '') if ver and not ver.startswith('sha256:'): index_digest = f"sha256:{ver}" else: index_digest = ver - status_display = format_status(parent_status) - - rows = [] - # Parent Row - rows.append([ - f"[bold cyan]{ntag}[/bold cyan]", - "[magenta]manifest/list[/magenta]", - "multi", - status_display, - f"[green]{total_downloads}[/green]", - f"[dim]{index_digest}[/dim]" - ]) - - # Children Rows + results = [] + # Parent Data + results.append({ + "tag": ntag, + "type": "manifest/list", + "platform": "multi", + "status": parent_status, + "downloads": total_downloads, + "digest": index_digest, + "is_child": False, + "slug": slug + }) + + # Children Data if detailed: - rows.extend(children_rows) - rows.append("SECTION") + results.extend(children_data) - return rows + return results def fetch_untagged_data(pkg, workspace, repo, img, detailed=False): digest = pkg.get('version') @@ -272,25 +300,25 @@ def fetch_untagged_data(pkg, workspace, repo, img, detailed=False): platform_str = " ".join(sorted(list(archs))) - status_display = format_status(status) - - rows = [] - rows.append([ - "(untagged)", - "manifest/list", - platform_str, - status_display, - f"[green]{downloads}[/green]", - digest - ]) + results = [] + results.append({ + "tag": "(untagged)", + "type": "manifest/list", + "platform": platform_str, + "status": status, + "downloads": downloads, + "digest": digest, + "is_child": False, + "slug": slug # Internal use + }) if detailed: for child in child_digests: row, _ = get_digest_data(workspace, repo, img, child['digest'], "(untagged)", platform=child['platform']) - rows.append(row) - rows.append("SECTION") + results.append(row) + results.append("SECTION") - return rows, slug + return results, slug def get_untagged_images(workspace, repo, img, delete=False, detailed=False): # console.print("[bold]Searching for untagged manifest lists...[/bold]") # Removed print @@ -329,58 +357,34 @@ def get_untagged_images(workspace, repo, img, delete=False, detailed=False): # Perform Deletion if requested deleted_slugs = set() + failed_slugs = set() if delete and packages_to_delete: - batch_size = 10 - def delete_pkg_task(slug): - del_url = f"https://api.cloudsmith.io/v1/packages/{workspace}/{repo}/{slug}/" - return slug, make_request(del_url, method='DELETE') - - for i in range(0, len(packages_to_delete), batch_size): - batch = packages_to_delete[i:i + batch_size] - with concurrent.futures.ThreadPoolExecutor(max_workers=batch_size) as executor: - futures = [executor.submit(delete_pkg_task, slug) for slug in batch] - for future in concurrent.futures.as_completed(futures): - slug, success = future.result() - if success: - deleted_slugs.add(slug) - - if i + batch_size < len(packages_to_delete): - time.sleep(1.1) - - # Build Table - table = Table(title=f"Untagged Manifest Lists: {img}", box=box.ROUNDED) - table.add_column("Tag", style="cyan") - table.add_column("Type", style="magenta") - table.add_column("Platform") - table.add_column("Status") - table.add_column("Downloads", justify="right") - table.add_column("Digest", style="dim") - if delete: - table.add_column("Action", style="bold red") + deleted_slugs, failed_slugs = batch_delete_packages(workspace, repo, packages_to_delete) + # Build Result Groups + groups = [] for i in range(len(untagged_pkgs)): if i in results_map: rows, slug = results_map[i] + # Update action status action_str = "" if delete: if slug in deleted_slugs: action_str = "Deleted" - else: + elif slug in failed_slugs: action_str = "Failed" - + for row in rows: - if row == "SECTION": - table.add_section() - else: - if delete: - table.add_row(*row, action_str) - else: - table.add_row(*row) + row['action'] = action_str + # Remove internal slug + if 'slug' in row: del row['slug'] + + groups.append(rows) - return table + return groups -def get_image_analysis(workspace, repo, img_name, detailed=False): +def get_image_analysis(workspace, repo, img_name, delete_all=False, delete_tag=None, detailed=False): tags_url = f"{CLOUDSMITH_URL}/v2/{workspace}/{repo}/{img_name}/tags/list" tags_json = make_request(tags_url, {"Accept": "application/vnd.oci.image.manifest.v1+json", "Cache-Control": "no-cache"}) @@ -399,14 +403,7 @@ def get_image_analysis(workspace, repo, img_name, detailed=False): if not tags: return None - table = Table(title=f"Image Analysis: {img_name}", box=box.ROUNDED) - table.add_column("Tag", style="cyan") - table.add_column("Type", style="magenta") - table.add_column("Platform") - table.add_column("Status") - table.add_column("Downloads", justify="right") - table.add_column("Digest", style="dim") - + groups = [] with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: future_to_tag = {executor.submit(fetch_tag_data, workspace, repo, img_name, t, detailed): t for t in tags} @@ -420,19 +417,120 @@ def get_image_analysis(workspace, repo, img_name, detailed=False): for t in tags: if t in results: - rows = results[t] - for row in rows: - if row == "SECTION": - table.add_section() - else: - table.add_row(*row) - return table + groups.append(results[t]) + + # Deletion Logic for Tagged Images + packages_to_delete = [] + for group in groups: + if not group: continue + parent = group[0] + # Only delete manifest lists + if parent.get('type') == 'manifest/list': + should_delete = False + if delete_all: + should_delete = True + elif delete_tag and parent.get('tag') == delete_tag: + should_delete = True + + if should_delete and parent.get('slug'): + packages_to_delete.append(parent['slug']) + + deleted_slugs = set() + failed_slugs = set() + if packages_to_delete: + deleted_slugs, failed_slugs = batch_delete_packages(workspace, repo, packages_to_delete) + + # Update Action Status in Groups + for group in groups: + if not group: continue + parent = group[0] + slug = parent.get('slug') + + action_str = "" + if slug in deleted_slugs: + action_str = "Deleted" + elif slug in failed_slugs: + action_str = "Failed" + + if action_str: + parent['action'] = action_str + # Optionally propagate to children if needed, but usually just parent row + for row in group: + row['action'] = action_str + + return groups def process_image(org, repo, img_name, args): if args.untagged or args.untagged_delete: return get_untagged_images(org, repo, img_name, delete=args.untagged_delete, detailed=args.detailed) else: - return get_image_analysis(org, repo, img_name, detailed=args.detailed) + return get_image_analysis(org, repo, img_name, delete_all=args.delete_all, delete_tag=args.delete_tag, detailed=args.detailed) + +def render_table(image_name, groups, is_untagged=False, has_action=False): + # --- Table Setup --- + table = Table(title=f"{'Untagged' if is_untagged else 'Tagged'} Image Analysis: {image_name}", box=box.ROUNDED) + table.add_column("Tag", style="cyan") + table.add_column("Type", style="magenta") + table.add_column("Platform") + table.add_column("Status") + table.add_column("Downloads", justify="right") + table.add_column("Digest", style="dim") + if has_action: + table.add_column("Action", style="bold red") + + # --- Row Rendering --- + for group in groups: + if not group: continue + parent = group[0] + + # Action string for delete status + action_str = parent.get('action', "") + + # Parent Row + if is_untagged: + table.add_row( + parent.get("tag", ""), + parent.get("type", ""), + parent.get("platform", ""), + format_status(parent.get("status", "")), + f"[green]{parent.get('downloads', 0)}[/green]", + f"[dim]{parent.get('digest', '')}[/dim]", + action_str if has_action else None + ) + else: + row_data = [ + f"[bold cyan]{parent.get('tag', '')}[/bold cyan]", + "[magenta]manifest/list[/magenta]", + "multi", + format_status(parent.get("status", "")), + f"[green]{parent.get('downloads', 0)}[/green]", + f"[dim]{parent.get('digest', '')}[/dim]" + ] + if has_action: + row_data.append(action_str) + + table.add_row(*row_data) + + # Child Rows + if len(group) > 1: + for row in group[1:]: + if row == "SECTION": + table.add_section() + else: + row_data = [ + f" └─ {row.get('tag', '')}", + row.get("type", ""), + row.get("platform", ""), + format_status(row.get("status", "")), + f"[green]{row.get('downloads', 0)}[/green]", + f"[dim]{row.get('digest', '')}[/dim]" + ] + if has_action: + row_data.append(row.get('action', '')) + + table.add_row(*row_data) + + return table def main(): console.print(r"""[bold cyan] @@ -457,7 +555,10 @@ def main(): parser.add_argument("img", nargs="?", help="Image Name (Optional - if omitted, scans all images)") parser.add_argument("--untagged", action="store_true", help="Find untagged manifest lists") parser.add_argument("--untagged-delete", action="store_true", help="Delete untagged manifest lists") + parser.add_argument("--delete-all", action="store_true", help="Delete ALL detected manifest lists") + parser.add_argument("--delete-tag", help="Delete manifest lists matching this specific tag") parser.add_argument("--detailed", action="store_true", help="Show detailed breakdown of digests") + parser.add_argument("--output", choices=['table', 'json', 'csv'], default='table', help="Output format (default: table)") args = parser.parse_args() @@ -525,13 +626,52 @@ def main(): if not collected_results: console.print("[yellow]No matching images or tags found.[/yellow]") - for _, table in collected_results: - console.print(table) - console.print("") + # --- Output Handling --- + + if args.output == 'table': + for img_name, groups in collected_results: + is_untagged = args.untagged or args.untagged_delete + has_action = args.untagged_delete or args.delete_all or (args.delete_tag is not None) + table = render_table(image_name=img_name, groups=groups, is_untagged=is_untagged, has_action=has_action) + console.print(table) + console.print("") + elif args.output == 'json': + # JSON Output for all images + all_results = {} + for img_name, groups in collected_results: + all_results[img_name] = groups + + json_output = json.dumps(all_results, indent=2) + console.print(json_output) + elif args.output == 'csv': + # CSV Output (simple flat structure) + csv_lines = [] + csv_lines.append(["Image", "Tag", "Type", "Platform", "Status", "Downloads", "Digest", "Action"]) # Header + + for img_name, groups in collected_results: + for group in groups: + if group == "SECTION": + continue + # Flat CSV row + csv_lines.append([ + img_name, + group.get("tag", ""), + group.get("type", ""), + group.get("platform", ""), + group.get("status", ""), + str(group.get("downloads", 0)), + group.get("digest", ""), + group.get("action", "") + ]) + + # Print CSV + for line in csv_lines: + console.print(",".join(f'"{str(item)}"' for item in line)) if __name__ == "__main__": try: main() except KeyboardInterrupt: console.print("\n[bold red]Operation cancelled by user.[/bold red]") - # Use os._exit to avoid \ No newline at end of file + # Use os._exit to avoid hanging on shutdown + os._exit(0) \ No newline at end of file From bedd155937c47ffc4dbd2adaec722be27362d2e1 Mon Sep 17 00:00:00 2001 From: colinmoynes Date: Wed, 17 Dec 2025 11:18:54 +0000 Subject: [PATCH 4/6] fixes --- Docker/Cloudsmith Docker Sleuth/multiarch.py | 115 ++++++++++++++++--- 1 file changed, 96 insertions(+), 19 deletions(-) diff --git a/Docker/Cloudsmith Docker Sleuth/multiarch.py b/Docker/Cloudsmith Docker Sleuth/multiarch.py index e6575cc..fcaf31d 100755 --- a/Docker/Cloudsmith Docker Sleuth/multiarch.py +++ b/Docker/Cloudsmith Docker Sleuth/multiarch.py @@ -3,12 +3,15 @@ import sys import os import json +import csv import argparse import urllib.request import urllib.error from urllib.parse import urlencode import concurrent.futures import time +import logging +from datetime import datetime # Try to import rich try: @@ -31,6 +34,28 @@ API_KEY = os.environ.get("CLOUDSMITH_API_KEY") AUTH_HEADER = {"Authorization": f"Bearer {API_KEY}"} if API_KEY else {} +# --- Logging Setup --- +def setup_logging(debug_mode=False): + log_filename = "multiarch_inspector.log" + level = logging.DEBUG if debug_mode else logging.INFO + + # Reset handlers to avoid duplicate logs if called multiple times + root = logging.getLogger() + if root.handlers: + for handler in root.handlers: + root.removeHandler(handler) + + logging.basicConfig( + filename=log_filename, + level=level, + format='%(asctime)s - %(levelname)s - %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' + ) + return logging.getLogger() + +# Initialize with default INFO level, will be re-initialized in main +logger = setup_logging() + # --- Helper Functions --- def make_request(url, headers=None, method='GET', data=None): @@ -56,9 +81,11 @@ def make_request(url, headers=None, method='GET', data=None): if reset: wait = float(reset) - time.time() if wait > 0 and wait < 30: # Only sleep if wait is reasonable + logger.warning(f"Rate limit approaching. Sleeping for {wait:.2f}s") time.sleep(wait + 0.5) if method == 'DELETE': + logger.info(f"DELETE Success: {url}") return True return json.loads(response.read().decode('utf-8')) except urllib.error.HTTPError as e: @@ -76,15 +103,20 @@ def make_request(url, headers=None, method='GET', data=None): wait_time = (2 ** attempt) if wait_time < 0: wait_time = 1 + logger.warning(f"Rate Limited (429). Retrying in {wait_time:.2f}s. URL: {url}") time.sleep(wait_time + 0.5) continue elif e.code == 404: + logger.debug(f"404 Not Found: {url}") return None else: + logger.error(f"HTTP Error {e.code}: {url}") return None except Exception as e: + logger.error(f"Request Error: {e} - URL: {url}") return None + logger.error(f"Max retries exceeded for: {url}") return None def find_key_recursive(obj, key): @@ -116,6 +148,7 @@ def batch_delete_packages(workspace, repo, slugs): if not slugs: return deleted, failed + logger.info(f"Starting batch deletion for {len(slugs)} packages.") batch_size = 10 def delete_pkg_task(slug): del_url = f"https://api.cloudsmith.io/v1/packages/{workspace}/{repo}/{slug}/" @@ -129,8 +162,10 @@ def delete_pkg_task(slug): slug, success = future.result() if success: deleted.add(slug) + logger.info(f"Deleted package slug: {slug}") else: failed.add(slug) + logger.error(f"Failed to delete package slug: {slug}") if i + batch_size < len(slugs): time.sleep(1.1) @@ -145,7 +180,7 @@ def get_digest_data(workspace, repo, img, digest, ntag_display, platform="unknow # 1. Fetch Manifest to get Architecture (Only if unknown) if platform == "unknown": manifest_url = f"{CLOUDSMITH_URL}/v2/{workspace}/{repo}/{img}/manifests/{digest}" - manifest_json = make_request(manifest_url, {"Accept": "application/vnd.oci.image.manifest.v1+json", "Cache-Control": "no-cache"}) + manifest_json = make_request(manifest_url, {"Accept": "application/vnd.oci.image.manifest.v2+json", "Cache-Control": "no-cache"}) if manifest_json: if 'manifests' in manifest_json: @@ -314,14 +349,14 @@ def fetch_untagged_data(pkg, workspace, repo, img, detailed=False): if detailed: for child in child_digests: - row, _ = get_digest_data(workspace, repo, img, child['digest'], "(untagged)", platform=child['platform']) + # FIX: get_digest_data returns a dict, not a tuple + row = get_digest_data(workspace, repo, img, child['digest'], "(untagged)", platform=child['platform']) results.append(row) results.append("SECTION") return results, slug def get_untagged_images(workspace, repo, img, delete=False, detailed=False): - # console.print("[bold]Searching for untagged manifest lists...[/bold]") # Removed print api_url = f"https://api.cloudsmith.io/v1/packages/{workspace}/{repo}/" query = urlencode({'query': f"name:{img}"}) full_url = f"{api_url}?{query}" @@ -337,8 +372,9 @@ def get_untagged_images(workspace, repo, img, delete=False, detailed=False): untagged_pkgs.append(p) if not untagged_pkgs: - # console.print("[yellow]No untagged manifest lists found.[/yellow]") # Removed print return None + + logger.info(f"Found {len(untagged_pkgs)} untagged manifest lists for image: {img}") # Fetch data first results_map = {} @@ -376,9 +412,10 @@ def get_untagged_images(workspace, repo, img, delete=False, detailed=False): action_str = "Failed" for row in rows: - row['action'] = action_str - # Remove internal slug - if 'slug' in row: del row['slug'] + if isinstance(row, dict): + row['action'] = action_str + # Remove internal slug + if 'slug' in row: del row['slug'] groups.append(rows) @@ -401,6 +438,7 @@ def get_image_analysis(workspace, repo, img_name, delete_all=False, delete_tag=N tags = sorted(list(set(flat_tags))) if not tags: + logger.info(f"No tags found for image: {img_name}") return None groups = [] @@ -435,6 +473,9 @@ def get_image_analysis(workspace, repo, img_name, delete_all=False, delete_tag=N if should_delete and parent.get('slug'): packages_to_delete.append(parent['slug']) + if packages_to_delete: + logger.info(f"Identified {len(packages_to_delete)} tagged packages to delete for image: {img_name}") + deleted_slugs = set() failed_slugs = set() if packages_to_delete: @@ -456,7 +497,8 @@ def get_image_analysis(workspace, repo, img_name, delete_all=False, delete_tag=N parent['action'] = action_str # Optionally propagate to children if needed, but usually just parent row for row in group: - row['action'] = action_str + if isinstance(row, dict): + row['action'] = action_str return groups @@ -533,6 +575,26 @@ def render_table(image_name, groups, is_untagged=False, has_action=False): return table def main(): + # Parse args first to configure logging + parser = argparse.ArgumentParser(description="Docker Multi-Arch Inspector") + parser.add_argument("org", help="Cloudsmith Organization/User") + parser.add_argument("repo", help="Cloudsmith Repository") + parser.add_argument("img", nargs="?", help="Image Name (Optional - if omitted, scans all images)") + parser.add_argument("--untagged", action="store_true", help="Find untagged manifest lists") + parser.add_argument("--untagged-delete", action="store_true", help="Delete untagged manifest lists") + parser.add_argument("--delete-all", action="store_true", help="Delete ALL detected manifest lists") + parser.add_argument("--delete-tag", help="Delete manifest lists matching this specific tag") + parser.add_argument("--detailed", action="store_true", help="Show detailed breakdown of digests") + parser.add_argument("--output", choices=['table', 'json', 'csv'], default='table', help="Output format (default: table)") + parser.add_argument("--debug-log", action="store_true", help="Enable debug logging to file") + + args = parser.parse_args() + + # Re-configure logging based on args + global logger + logger = setup_logging(args.debug_log) + + logger.info("--- Script Started ---") console.print(r"""[bold cyan] ██████╗██╗ ██████╗ ██╗ ██╗██████╗ ███████╗███╗ ███╗██╗████████╗██╗ ██╗ ██╔════╝██║ ██╔═══██╗██║ ██║██╔══██╗██╔════╝████╗ ████║██║╚══██╔══╝██║ ██║ @@ -559,24 +621,34 @@ def main(): parser.add_argument("--delete-tag", help="Delete manifest lists matching this specific tag") parser.add_argument("--detailed", action="store_true", help="Show detailed breakdown of digests") parser.add_argument("--output", choices=['table', 'json', 'csv'], default='table', help="Output format (default: table)") + parser.add_argument("--debug-log", action="store_true", help="Enable debug logging to file") args = parser.parse_args() + logger.info(f"Arguments: {args}") images_to_scan = [] if args.img: images_to_scan.append(args.img) else: - console.print(f"[bold]Fetching catalog for {args.org}/{args.repo}...[/bold]") + if args.output == 'table': + console.print(f"[bold]Fetching catalog for {args.org}/{args.repo}...[/bold]") + + logger.info(f"Fetching catalog for {args.org}/{args.repo}") catalog_url = f"{CLOUDSMITH_URL}/v2/{args.org}/{args.repo}/_catalog" catalog_json = make_request(catalog_url, {"Accept": "application/json", "Cache-Control": "no-cache"}) if catalog_json and 'repositories' in catalog_json: images_to_scan = catalog_json['repositories'] + logger.info(f"Found {len(images_to_scan)} images in catalog.") else: - console.print("[red]Failed to fetch catalog or no images found.[/red]") + msg = "Failed to fetch catalog or no images found." + if args.output == 'table': + console.print(f"[red]{msg}[/red]") + logger.error(msg) sys.exit(1) + # Only show progress bar for table output with Progress( SpinnerColumn(), TextColumn("[progress.description]{task.description}"), @@ -601,16 +673,16 @@ def main(): for future in concurrent.futures.as_completed(future_to_img): img_name = future_to_img[future] try: - table = future.result() - if table: - collected_results.append((img_name, table)) - else: - # Optional: log empty/no tags - pass + groups = future.result() + if groups: + collected_results.append((img_name, groups)) except Exception as e: - progress.console.print(f"[red]Error processing {img_name}: {e}[/red]") + logger.error(f"Error processing {img_name}: {e}") + if args.output == 'table': + progress.console.print(f"[red]Error processing {img_name}: {e}[/red]") - progress.advance(task) + if args.output == 'table': + progress.advance(task) # Normal shutdown executor.shutdown(wait=True) @@ -624,7 +696,12 @@ def main(): collected_results.sort(key=lambda x: x[0]) if not collected_results: - console.print("[yellow]No matching images or tags found.[/yellow]") + if args.output == 'table': + console.print("[yellow]No matching images or tags found.[/yellow]") + elif args.output == 'json': + print("[]") + logger.info("No matching images or tags found.") + return # --- Output Handling --- From e1c406055b3b2ac8b115362e33c16ff12f8fed74 Mon Sep 17 00:00:00 2001 From: colinmoynes Date: Wed, 17 Dec 2025 11:45:44 +0000 Subject: [PATCH 5/6] improved fetching of tags. now entirelly cloudsmith api driven --- Docker/Cloudsmith Docker Sleuth/multiarch.py | 66 ++++++++++++++------ 1 file changed, 48 insertions(+), 18 deletions(-) diff --git a/Docker/Cloudsmith Docker Sleuth/multiarch.py b/Docker/Cloudsmith Docker Sleuth/multiarch.py index fcaf31d..0be3883 100755 --- a/Docker/Cloudsmith Docker Sleuth/multiarch.py +++ b/Docker/Cloudsmith Docker Sleuth/multiarch.py @@ -58,7 +58,7 @@ def setup_logging(debug_mode=False): # --- Helper Functions --- -def make_request(url, headers=None, method='GET', data=None): +def make_request(url, headers=None, method='GET', data=None, return_headers=False): """Performs an HTTP request and returns parsed JSON. Handles rate limiting.""" if headers is None: headers = {} @@ -87,7 +87,12 @@ def make_request(url, headers=None, method='GET', data=None): if method == 'DELETE': logger.info(f"DELETE Success: {url}") return True - return json.loads(response.read().decode('utf-8')) + + resp_data = json.loads(response.read().decode('utf-8')) + if return_headers: + return resp_data, response.headers + return resp_data + except urllib.error.HTTPError as e: if e.code == 429: # Rate limited - wait and retry @@ -422,28 +427,53 @@ def get_untagged_images(workspace, repo, img, delete=False, detailed=False): return groups def get_image_analysis(workspace, repo, img_name, delete_all=False, delete_tag=None, detailed=False): - tags_url = f"{CLOUDSMITH_URL}/v2/{workspace}/{repo}/{img_name}/tags/list" - tags_json = make_request(tags_url, {"Accept": "application/vnd.oci.image.manifest.v1+json", "Cache-Control": "no-cache"}) + # Switch to Cloudsmith API to avoid upstream tags and allow filtering + api_url = f"https://api.cloudsmith.io/v1/packages/{workspace}/{repo}/" - tags = [] - if tags_json: - raw_tags = find_key_recursive(tags_json, 'tags') - flat_tags = [] - for item in raw_tags: - if isinstance(item, list): - flat_tags.extend(item) - else: - flat_tags.append(item) + # Construct query: format:docker AND name:{img_name} (if provided) + query_parts = ["format:docker"] + if img_name: + query_parts.append(f"name:{img_name}") + + query = urlencode({'query': " AND ".join(query_parts)}) + next_url = f"{api_url}?{query}" + + tags = set() + + # Pagination Loop + while next_url: + result = make_request(next_url, {"Cache-Control": "no-cache"}, return_headers=True) + if not result: + break + + data, headers = result - tags = sorted(list(set(flat_tags))) - - if not tags: + for pkg in data: + # pkg['tags'] is a dict like {'version': [...]} + version_tags = pkg.get('tags', {}).get('version', []) + for t in version_tags: + tags.add(t) + + # Handle Pagination via Link header + next_url = None + link_header = headers.get('Link') + if link_header: + links = link_header.split(',') + for link in links: + if 'rel="next"' in link: + # Format: ; rel="next" + next_url = link.split(';')[0].strip('<> ') + break + + sorted_tags = sorted(list(tags)) + + if not sorted_tags: logger.info(f"No tags found for image: {img_name}") return None groups = [] with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: - future_to_tag = {executor.submit(fetch_tag_data, workspace, repo, img_name, t, detailed): t for t in tags} + future_to_tag = {executor.submit(fetch_tag_data, workspace, repo, img_name, t, detailed): t for t in sorted_tags} results = {} for future in concurrent.futures.as_completed(future_to_tag): @@ -453,7 +483,7 @@ def get_image_analysis(workspace, repo, img_name, delete_all=False, delete_tag=N except Exception: pass - for t in tags: + for t in sorted_tags: if t in results: groups.append(results[t]) From 38e18fa2353df013139e96c5f38eaee9f3208681 Mon Sep 17 00:00:00 2001 From: colinmoynes Date: Wed, 17 Dec 2025 13:06:09 +0000 Subject: [PATCH 6/6] progress bar improvements --- Docker/Cloudsmith Docker Sleuth/multiarch.py | 70 +++++++++++++++----- 1 file changed, 52 insertions(+), 18 deletions(-) diff --git a/Docker/Cloudsmith Docker Sleuth/multiarch.py b/Docker/Cloudsmith Docker Sleuth/multiarch.py index 0be3883..015e573 100755 --- a/Docker/Cloudsmith Docker Sleuth/multiarch.py +++ b/Docker/Cloudsmith Docker Sleuth/multiarch.py @@ -361,7 +361,7 @@ def fetch_untagged_data(pkg, workspace, repo, img, detailed=False): return results, slug -def get_untagged_images(workspace, repo, img, delete=False, detailed=False): +def get_untagged_images(workspace, repo, img, delete=False, detailed=False, progress=None): api_url = f"https://api.cloudsmith.io/v1/packages/{workspace}/{repo}/" query = urlencode({'query': f"name:{img}"}) full_url = f"{api_url}?{query}" @@ -385,6 +385,10 @@ def get_untagged_images(workspace, repo, img, delete=False, detailed=False): results_map = {} packages_to_delete = [] + task_id = None + if progress: + task_id = progress.add_task(f"[cyan]Analyzing {img}[/cyan] ({len(untagged_pkgs)} untagged)", total=len(untagged_pkgs)) + with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: futures = {executor.submit(fetch_untagged_data, pkg, workspace, repo, img, detailed): i for i, pkg in enumerate(untagged_pkgs)} for future in concurrent.futures.as_completed(futures): @@ -395,6 +399,12 @@ def get_untagged_images(workspace, repo, img, delete=False, detailed=False): packages_to_delete.append(slug) except Exception: pass + + if progress and task_id is not None: + progress.advance(task_id) + + if progress and task_id is not None: + progress.remove_task(task_id) # Perform Deletion if requested deleted_slugs = set() @@ -426,7 +436,7 @@ def get_untagged_images(workspace, repo, img, delete=False, detailed=False): return groups -def get_image_analysis(workspace, repo, img_name, delete_all=False, delete_tag=None, detailed=False): +def get_image_analysis(workspace, repo, img_name, delete_all=False, delete_tag=None, detailed=False, progress=None): # Switch to Cloudsmith API to avoid upstream tags and allow filtering api_url = f"https://api.cloudsmith.io/v1/packages/{workspace}/{repo}/" @@ -472,6 +482,11 @@ def get_image_analysis(workspace, repo, img_name, delete_all=False, delete_tag=N return None groups = [] + + task_id = None + if progress: + task_id = progress.add_task(f"[cyan]Analyzing {img_name}[/cyan] ({len(sorted_tags)} tags)", total=len(sorted_tags)) + with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: future_to_tag = {executor.submit(fetch_tag_data, workspace, repo, img_name, t, detailed): t for t in sorted_tags} @@ -482,10 +497,16 @@ def get_image_analysis(workspace, repo, img_name, delete_all=False, delete_tag=N results[tag] = future.result() except Exception: pass + + if progress and task_id is not None: + progress.advance(task_id) for t in sorted_tags: if t in results: groups.append(results[t]) + + if progress and task_id is not None: + progress.remove_task(task_id) # Deletion Logic for Tagged Images packages_to_delete = [] @@ -532,11 +553,11 @@ def get_image_analysis(workspace, repo, img_name, delete_all=False, delete_tag=N return groups -def process_image(org, repo, img_name, args): +def process_image(org, repo, img_name, args, progress=None): if args.untagged or args.untagged_delete: - return get_untagged_images(org, repo, img_name, delete=args.untagged_delete, detailed=args.detailed) + return get_untagged_images(org, repo, img_name, delete=args.untagged_delete, detailed=args.detailed, progress=progress) else: - return get_image_analysis(org, repo, img_name, delete_all=args.delete_all, delete_tag=args.delete_tag, detailed=args.detailed) + return get_image_analysis(org, repo, img_name, delete_all=args.delete_all, delete_tag=args.delete_tag, detailed=args.detailed, progress=progress) def render_table(image_name, groups, is_untagged=False, has_action=False): # --- Table Setup --- @@ -679,24 +700,37 @@ def main(): sys.exit(1) # Only show progress bar for table output - with Progress( - SpinnerColumn(), - TextColumn("[progress.description]{task.description}"), - BarColumn(), - TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), - console=console - ) as progress: - task = progress.add_task(f"Processing {len(images_to_scan)} images...", total=len(images_to_scan)) + if args.output == 'table': + progress_ctx = Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + BarColumn(), + TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), + console=console + ) + else: + # Dummy context manager for non-table output + class DummyProgress: + def __enter__(self): return self + def __exit__(self, *args): pass + def add_task(self, *args, **kwargs): return None + def advance(self, *args, **kwargs): pass + def remove_task(self, *args, **kwargs): pass + @property + def console(self): return console # fallback + progress_ctx = DummyProgress() + + collected_results = [] + + with progress_ctx as progress: + if args.output == 'table': + task = progress.add_task(f"Processing {len(images_to_scan)} images...", total=len(images_to_scan)) - collected_results = [] - # Use a reasonable number of workers for images (e.g., 5) - # Each image might spawn its own threads for tags/digests - # Manually manage executor to handle KeyboardInterrupt gracefully executor = concurrent.futures.ThreadPoolExecutor(max_workers=5) try: future_to_img = { - executor.submit(process_image, args.org, args.repo, img, args): img + executor.submit(process_image, args.org, args.repo, img, args, progress=progress): img for img in images_to_scan }