From c0b694accf9dce2730631d21c381357ec2d94c97 Mon Sep 17 00:00:00 2001 From: colinmoynes Date: Mon, 15 Dec 2025 11:06:53 +0000 Subject: [PATCH 1/2] Implementing rate limit handling and improved multi threaded performance. --- Docker/Cloudsmith Docker Sleuth/multiarch.py | 293 ++++++++++++------- 1 file changed, 186 insertions(+), 107 deletions(-) diff --git a/Docker/Cloudsmith Docker Sleuth/multiarch.py b/Docker/Cloudsmith Docker Sleuth/multiarch.py index cabecd3..767d8e2 100755 --- a/Docker/Cloudsmith Docker Sleuth/multiarch.py +++ b/Docker/Cloudsmith Docker Sleuth/multiarch.py @@ -8,6 +8,7 @@ import urllib.error from urllib.parse import urlencode import concurrent.futures +import time # Try to import rich try: @@ -15,6 +16,7 @@ from rich.table import Table from rich import box from rich.text import Text + from rich.progress import Progress, SpinnerColumn, BarColumn, TextColumn except ImportError: print("Error: This script requires the 'rich' library.") print("Please install it using: pip install rich") @@ -32,7 +34,7 @@ # --- Helper Functions --- def make_request(url, headers=None, method='GET', data=None): - """Performs an HTTP request and returns parsed JSON.""" + """Performs an HTTP request and returns parsed JSON. Handles rate limiting.""" if headers is None: headers = {} @@ -42,14 +44,48 @@ def make_request(url, headers=None, method='GET', data=None): if data: req.data = data.encode('utf-8') - try: - with urllib.request.urlopen(req) as response: - return json.loads(response.read().decode('utf-8')) - except urllib.error.HTTPError as e: - return None - except Exception as e: - # Avoid printing to stderr in threads to prevent garbled output - return None + max_retries = 5 + for attempt in range(max_retries): + try: + with urllib.request.urlopen(req) as response: + # Proactive Rate Limit Handling via Headers + # https://docs.cloudsmith.com/api/rate-limits#monitoring-your-usage + remaining = response.headers.get('X-RateLimit-Remaining') + if remaining is not None and int(remaining) < 3: + reset = response.headers.get('X-RateLimit-Reset') + if reset: + wait = float(reset) - time.time() + if wait > 0 and wait < 30: # Only sleep if wait is reasonable + time.sleep(wait + 0.5) + + if method == 'DELETE': + return True + return json.loads(response.read().decode('utf-8')) + except urllib.error.HTTPError as e: + if e.code == 429: + # Rate limited - wait and retry + retry_after = e.headers.get('Retry-After') + if retry_after: + wait_time = float(retry_after) + else: + # Fallback to X-RateLimit-Reset + reset = e.headers.get('X-RateLimit-Reset') + if reset: + wait_time = float(reset) - time.time() + else: + wait_time = (2 ** attempt) + + if wait_time < 0: wait_time = 1 + time.sleep(wait_time + 0.5) + continue + elif e.code == 404: + return None + else: + return None + except Exception as e: + return None + + return None def find_key_recursive(obj, key): """Recursively searches for a key in a dictionary/list and returns a list of values.""" @@ -257,7 +293,7 @@ def fetch_untagged_data(pkg, workspace, repo, img, detailed=False): return rows, slug def get_untagged_images(workspace, repo, img, delete=False, detailed=False): - console.print("[bold]Searching for untagged manifest lists...[/bold]") + # console.print("[bold]Searching for untagged manifest lists...[/bold]") # Removed print api_url = f"https://api.cloudsmith.io/v1/packages/{workspace}/{repo}/" query = urlencode({'query': f"name:{img}"}) full_url = f"{api_url}?{query}" @@ -273,57 +309,130 @@ def get_untagged_images(workspace, repo, img, delete=False, detailed=False): untagged_pkgs.append(p) if not untagged_pkgs: - console.print("[yellow]No untagged manifest lists found.[/yellow]") - return + # console.print("[yellow]No untagged manifest lists found.[/yellow]") # Removed print + return None + + # Fetch data first + results_map = {} + packages_to_delete = [] + + with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: + futures = {executor.submit(fetch_untagged_data, pkg, workspace, repo, img, detailed): i for i, pkg in enumerate(untagged_pkgs)} + for future in concurrent.futures.as_completed(futures): + index = futures[future] + try: + rows, slug = future.result() + results_map[index] = (rows, slug) + packages_to_delete.append(slug) + except Exception: + pass + + # Perform Deletion if requested + deleted_slugs = set() + if delete and packages_to_delete: + batch_size = 10 + def delete_pkg_task(slug): + del_url = f"https://api.cloudsmith.io/v1/packages/{workspace}/{repo}/{slug}/" + return slug, make_request(del_url, method='DELETE') + + for i in range(0, len(packages_to_delete), batch_size): + batch = packages_to_delete[i:i + batch_size] + with concurrent.futures.ThreadPoolExecutor(max_workers=batch_size) as executor: + futures = [executor.submit(delete_pkg_task, slug) for slug in batch] + for future in concurrent.futures.as_completed(futures): + slug, success = future.result() + if success: + deleted_slugs.add(slug) + + if i + batch_size < len(packages_to_delete): + time.sleep(1.1) - # Create Table - table = Table(title="Untagged Manifest Lists", box=box.ROUNDED) + # Build Table + table = Table(title=f"Untagged Manifest Lists: {img}", box=box.ROUNDED) table.add_column("Tag", style="cyan") table.add_column("Type", style="magenta") table.add_column("Platform") table.add_column("Status") table.add_column("Downloads", justify="right") table.add_column("Digest", style="dim") + if delete: + table.add_column("Action", style="bold red") - packages_to_delete = [] - - with console.status("[bold green]Fetching untagged data...[/bold green]"): - with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: - # Submit all tasks - futures = {executor.submit(fetch_untagged_data, pkg, workspace, repo, img, detailed): i for i, pkg in enumerate(untagged_pkgs)} + for i in range(len(untagged_pkgs)): + if i in results_map: + rows, slug = results_map[i] - results = {} - for future in concurrent.futures.as_completed(futures): - index = futures[future] - try: - results[index] = future.result() - except Exception as e: - console.print(f"[red]Error processing untagged image: {e}[/red]") + action_str = "" + if delete: + if slug in deleted_slugs: + action_str = "Deleted" + else: + action_str = "Failed" + + for row in rows: + if row == "SECTION": + table.add_section() + else: + if delete: + table.add_row(*row, action_str) + else: + table.add_row(*row) + + return table + +def get_image_analysis(workspace, repo, img_name, detailed=False): + tags_url = f"{CLOUDSMITH_URL}/v2/{workspace}/{repo}/{img_name}/tags/list" + tags_json = make_request(tags_url, {"Accept": "application/vnd.oci.image.manifest.v1+json", "Cache-Control": "no-cache"}) + + tags = [] + if tags_json: + raw_tags = find_key_recursive(tags_json, 'tags') + flat_tags = [] + for item in raw_tags: + if isinstance(item, list): + flat_tags.extend(item) + else: + flat_tags.append(item) + + tags = sorted(list(set(flat_tags))) - # Add to table in original order - for i in range(len(untagged_pkgs)): - if i in results: - rows, slug = results[i] - packages_to_delete.append(slug) - for row in rows: - if row == "SECTION": - table.add_section() - else: - table.add_row(*row) + if not tags: + return None - console.print(table) + table = Table(title=f"Image Analysis: {img_name}", box=box.ROUNDED) + table.add_column("Tag", style="cyan") + table.add_column("Type", style="magenta") + table.add_column("Platform") + table.add_column("Status") + table.add_column("Downloads", justify="right") + table.add_column("Digest", style="dim") - if delete: - console.print("\n[bold red]Deleting untagged packages...[/bold red]") - for slug in packages_to_delete: - console.print(f" Deleting package: {slug}...", end=" ") - del_url = f"https://api.cloudsmith.io/v1/packages/{workspace}/{repo}/{slug}/" - req = urllib.request.Request(del_url, headers=AUTH_HEADER, method='DELETE') + with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: + future_to_tag = {executor.submit(fetch_tag_data, workspace, repo, img_name, t, detailed): t for t in tags} + + results = {} + for future in concurrent.futures.as_completed(future_to_tag): + tag = future_to_tag[future] try: - with urllib.request.urlopen(req): - console.print("[green]Deleted.[/green]") - except Exception as e: - console.print(f"[red]Failed: {e}[/red]") + results[tag] = future.result() + except Exception: + pass + + for t in tags: + if t in results: + rows = results[t] + for row in rows: + if row == "SECTION": + table.add_section() + else: + table.add_row(*row) + return table + +def process_image(org, repo, img_name, args): + if args.untagged or args.untagged_delete: + return get_untagged_images(org, repo, img_name, delete=args.untagged_delete, detailed=args.detailed) + else: + return get_image_analysis(org, repo, img_name, detailed=args.detailed) def main(): parser = argparse.ArgumentParser(description="Docker Multi-Arch Inspector") @@ -351,68 +460,38 @@ def main(): console.print("[red]Failed to fetch catalog or no images found.[/red]") sys.exit(1) - for img_name in images_to_scan: - console.print(f"\nDocker Image: [bold blue]{args.org}/{args.repo}/{img_name}[/bold blue]") - - if args.untagged or args.untagged_delete: - get_untagged_images(args.org, args.repo, img_name, delete=args.untagged_delete, detailed=args.detailed) - else: - # Get Tags - tags_url = f"{CLOUDSMITH_URL}/v2/{args.org}/{args.repo}/{img_name}/tags/list" - tags_json = make_request(tags_url, {"Accept": "application/vnd.oci.image.manifest.v1+json", "Cache-Control": "no-cache"}) + with Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + BarColumn(), + TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), + console=console + ) as progress: + task = progress.add_task(f"Scanning {len(images_to_scan)} images...", total=len(images_to_scan)) + + # Use a reasonable number of workers for images (e.g., 5) + # Each image might spawn its own threads for tags/digests + with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: + future_to_img = { + executor.submit(process_image, args.org, args.repo, img, args): img + for img in images_to_scan + } - tags = [] - if tags_json: - raw_tags = find_key_recursive(tags_json, 'tags') - flat_tags = [] - for item in raw_tags: - if isinstance(item, list): - flat_tags.extend(item) + for future in concurrent.futures.as_completed(future_to_img): + img_name = future_to_img[future] + try: + table = future.result() + if table: + # Print the table to the console (thread-safe via rich) + progress.console.print(table) + progress.console.print("") # Newline else: - flat_tags.append(item) + # Optional: log empty/no tags + pass + except Exception as e: + progress.console.print(f"[red]Error processing {img_name}: {e}[/red]") - tags = sorted(list(set(flat_tags))) - - if not tags: - console.print(f"[yellow]No tags found for {img_name}.[/yellow]") - continue - - console.print(f"Found matching tags: [bold]{len(tags)}[/bold]") - - # Create Main Table - table = Table(title=f"Image Analysis: {img_name}", box=box.ROUNDED) - table.add_column("Tag", style="cyan") - table.add_column("Type", style="magenta") - table.add_column("Platform") - table.add_column("Status") - table.add_column("Downloads", justify="right") - table.add_column("Digest", style="dim") - - with console.status(f"[bold green]Fetching data for {img_name}...[/bold green]"): - with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: - # Submit all tasks - future_to_tag = {executor.submit(fetch_tag_data, args.org, args.repo, img_name, t, args.detailed): t for t in tags} - - results = {} - for future in concurrent.futures.as_completed(future_to_tag): - tag = future_to_tag[future] - try: - results[tag] = future.result() - except Exception as exc: - console.print(f"[red]Tag {tag} generated an exception: {exc}[/red]") - - # Add to table in sorted order - for t in tags: - if t in results: - rows = results[t] - for row in rows: - if row == "SECTION": - table.add_section() - else: - table.add_row(*row) - - # Print the final table - console.print(table) + progress.advance(task) if __name__ == "__main__": main() \ No newline at end of file From da132e521a3be3b385b637e7d2bdb4055049c25a Mon Sep 17 00:00:00 2001 From: colinmoynes Date: Mon, 15 Dec 2025 13:31:46 +0000 Subject: [PATCH 2/2] error handling and minor changes --- Docker/Cloudsmith Docker Sleuth/multiarch.py | 56 +++++++++++++++++--- 1 file changed, 48 insertions(+), 8 deletions(-) diff --git a/Docker/Cloudsmith Docker Sleuth/multiarch.py b/Docker/Cloudsmith Docker Sleuth/multiarch.py index 767d8e2..03a227c 100755 --- a/Docker/Cloudsmith Docker Sleuth/multiarch.py +++ b/Docker/Cloudsmith Docker Sleuth/multiarch.py @@ -228,7 +228,7 @@ def fetch_tag_data(workspace, repo, img, ntag, detailed=False): "[magenta]manifest/list[/magenta]", "multi", status_display, - str(total_downloads), + f"[green]{total_downloads}[/green]", f"[dim]{index_digest}[/dim]" ]) @@ -280,7 +280,7 @@ def fetch_untagged_data(pkg, workspace, repo, img, detailed=False): "manifest/list", platform_str, status_display, - str(downloads), + f"[green]{downloads}[/green]", digest ]) @@ -435,6 +435,22 @@ def process_image(org, repo, img_name, args): return get_image_analysis(org, repo, img_name, detailed=args.detailed) def main(): + console.print(r"""[bold cyan] +██████╗██╗ ██████╗ ██╗ ██╗██████╗ ███████╗███╗ ███╗██╗████████╗██╗ ██╗ +██╔════╝██║ ██╔═══██╗██║ ██║██╔══██╗██╔════╝████╗ ████║██║╚══██╔══╝██║ ██║ +██║ ██║ ██║ ██║██║ ██║██║ ██║███████╗██╔████╔██║██║ ██║ ███████║ +██║ ██║ ██║ ██║██║ ██║██║ ██║╚════██║██║╚██╔╝██║██║ ██║ ██╔══██║ +╚██████╗███████╗╚██████╔╝╚██████╔╝██████╔╝███████║██║ ╚═╝ ██║██║ ██║ ██║ ██║ + ╚═════╝╚══════╝ ╚═════╝ ╚═════╝ ╚═════╝ ╚══════╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝ ╚═╝ + +██████╗ ██████╗ ██████╗██╗ ██╗███████╗██████╗ ███████╗██╗ ███████╗██╗ ██╗████████╗██╗ ██╗ +██╔══██╗██╔═══██╗██╔════╝██║ ██╔╝██╔════╝██╔══██╗ ██╔════╝██║ ██╔════╝██║ ██║╚══██╔══╝██║ ██║ +██║ ██║██║ ██║██║ █████╔╝ █████╗ ██████╔╝ ███████╗██║ █████╗ ██║ ██║ ██║ ███████║ +██║ ██║██║ ██║██║ ██╔═██╗ ██╔══╝ ██╔══██╗ ╚════██║██║ ██╔══╝ ██║ ██║ ██║ ██╔══██║ +██████╔╝╚██████╔╝╚██████╗██║ ██╗███████╗██║ ██║ ███████║███████╗███████╗╚██████╔╝ ██║ ██║ ██║ +╚═════╝ ╚═════╝ ╚═════╝╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝ ╚══════╝╚══════╝╚══════╝ ╚═════╝ ╚═╝ ╚═╝ ╚═╝ +[/bold cyan]""") + parser = argparse.ArgumentParser(description="Docker Multi-Arch Inspector") parser.add_argument("org", help="Cloudsmith Organization/User") parser.add_argument("repo", help="Cloudsmith Repository") @@ -467,11 +483,15 @@ def main(): TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), console=console ) as progress: - task = progress.add_task(f"Scanning {len(images_to_scan)} images...", total=len(images_to_scan)) + task = progress.add_task(f"Processing {len(images_to_scan)} images...", total=len(images_to_scan)) + collected_results = [] + # Use a reasonable number of workers for images (e.g., 5) # Each image might spawn its own threads for tags/digests - with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: + # Manually manage executor to handle KeyboardInterrupt gracefully + executor = concurrent.futures.ThreadPoolExecutor(max_workers=5) + try: future_to_img = { executor.submit(process_image, args.org, args.repo, img, args): img for img in images_to_scan @@ -482,9 +502,7 @@ def main(): try: table = future.result() if table: - # Print the table to the console (thread-safe via rich) - progress.console.print(table) - progress.console.print("") # Newline + collected_results.append((img_name, table)) else: # Optional: log empty/no tags pass @@ -492,6 +510,28 @@ def main(): progress.console.print(f"[red]Error processing {img_name}: {e}[/red]") progress.advance(task) + + # Normal shutdown + executor.shutdown(wait=True) + + except KeyboardInterrupt: + # Force shutdown without waiting + executor.shutdown(wait=False, cancel_futures=True) + raise + + # Sort results by image name and print + collected_results.sort(key=lambda x: x[0]) + + if not collected_results: + console.print("[yellow]No matching images or tags found.[/yellow]") + + for _, table in collected_results: + console.print(table) + console.print("") if __name__ == "__main__": - main() \ No newline at end of file + try: + main() + except KeyboardInterrupt: + console.print("\n[bold red]Operation cancelled by user.[/bold red]") + # Use os._exit to avoid \ No newline at end of file