diff --git a/Docker/Cloudsmith Docker Sleuth/multiarch.py b/Docker/Cloudsmith Docker Sleuth/multiarch.py index 5b9318b..2484907 100755 --- a/Docker/Cloudsmith Docker Sleuth/multiarch.py +++ b/Docker/Cloudsmith Docker Sleuth/multiarch.py @@ -10,6 +10,8 @@ from urllib.parse import urlencode import concurrent.futures import time +import logging +from datetime import datetime # Try to import rich try: @@ -32,9 +34,31 @@ API_KEY = os.environ.get("CLOUDSMITH_API_KEY") AUTH_HEADER = {"Authorization": f"Bearer {API_KEY}"} if API_KEY else {} +# --- Logging Setup --- +def setup_logging(debug_mode=False): + log_filename = "multiarch_inspector.log" + level = logging.DEBUG if debug_mode else logging.INFO + + # Reset handlers to avoid duplicate logs if called multiple times + root = logging.getLogger() + if root.handlers: + for handler in root.handlers: + root.removeHandler(handler) + + logging.basicConfig( + filename=log_filename, + level=level, + format='%(asctime)s - %(levelname)s - %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' + ) + return logging.getLogger() + +# Initialize with default INFO level, will be re-initialized in main +logger = setup_logging() + # --- Helper Functions --- -def make_request(url, headers=None, method='GET', data=None): +def make_request(url, headers=None, method='GET', data=None, return_headers=False): """Performs an HTTP request and returns parsed JSON. Handles rate limiting.""" if headers is None: headers = {} @@ -57,11 +81,18 @@ def make_request(url, headers=None, method='GET', data=None): if reset: wait = float(reset) - time.time() if wait > 0 and wait < 30: # Only sleep if wait is reasonable + logger.warning(f"Rate limit approaching. Sleeping for {wait:.2f}s") time.sleep(wait + 0.5) if method == 'DELETE': + logger.info(f"DELETE Success: {url}") return True - return json.loads(response.read().decode('utf-8')) + + resp_data = json.loads(response.read().decode('utf-8')) + if return_headers: + return resp_data, response.headers + return resp_data + except urllib.error.HTTPError as e: if e.code == 429: # Rate limited - wait and retry @@ -77,15 +108,20 @@ def make_request(url, headers=None, method='GET', data=None): wait_time = (2 ** attempt) if wait_time < 0: wait_time = 1 + logger.warning(f"Rate Limited (429). Retrying in {wait_time:.2f}s. URL: {url}") time.sleep(wait_time + 0.5) continue elif e.code == 404: + logger.debug(f"404 Not Found: {url}") return None else: + logger.error(f"HTTP Error {e.code}: {url}") return None except Exception as e: + logger.error(f"Request Error: {e} - URL: {url}") return None + logger.error(f"Max retries exceeded for: {url}") return None def find_key_recursive(obj, key): @@ -110,6 +146,37 @@ def format_status(status_str): if status_str == "Failed": return f"[bold red]{status_str}[/bold red] ❌" return status_str +def batch_delete_packages(workspace, repo, slugs): + """Deletes a list of package slugs in batches to respect rate limits.""" + deleted = set() + failed = set() + if not slugs: + return deleted, failed + + logger.info(f"Starting batch deletion for {len(slugs)} packages.") + batch_size = 10 + def delete_pkg_task(slug): + del_url = f"https://api.cloudsmith.io/v1/packages/{workspace}/{repo}/{slug}/" + return slug, make_request(del_url, method='DELETE') + + for i in range(0, len(slugs), batch_size): + batch = slugs[i:i + batch_size] + with concurrent.futures.ThreadPoolExecutor(max_workers=batch_size) as executor: + futures = [executor.submit(delete_pkg_task, slug) for slug in batch] + for future in concurrent.futures.as_completed(futures): + slug, success = future.result() + if success: + deleted.add(slug) + logger.info(f"Deleted package slug: {slug}") + else: + failed.add(slug) + logger.error(f"Failed to delete package slug: {slug}") + + if i + batch_size < len(slugs): + time.sleep(1.1) + + return deleted, failed + # --- Core Logic --- def get_digest_data(workspace, repo, img, digest, ntag_display, platform="unknown"): @@ -118,7 +185,7 @@ def get_digest_data(workspace, repo, img, digest, ntag_display, platform="unknow # 1. Fetch Manifest to get Architecture (Only if unknown) if platform == "unknown": manifest_url = f"{CLOUDSMITH_URL}/v2/{workspace}/{repo}/{img}/manifests/{digest}" - manifest_json = make_request(manifest_url, {"Accept": "application/vnd.oci.image.manifest.v1+json", "Cache-Control": "no-cache"}) + manifest_json = make_request(manifest_url, {"Accept": "application/vnd.oci.image.manifest.v2+json", "Cache-Control": "no-cache"}) if manifest_json: if 'manifests' in manifest_json: @@ -210,9 +277,11 @@ def fetch_tag_data(workspace, repo, img, ntag, detailed=False): parent_status = "Unknown" index_digest = "" + slug = "" if pkg_details and len(pkg_details) > 0: parent_status = pkg_details[0].get('status_str', 'Unknown') + slug = pkg_details[0].get('slug', '') ver = pkg_details[0].get('version', '') if ver and not ver.startswith('sha256:'): index_digest = f"sha256:{ver}" @@ -228,7 +297,8 @@ def fetch_tag_data(workspace, repo, img, ntag, detailed=False): "status": parent_status, "downloads": total_downloads, "digest": index_digest, - "is_child": False + "is_child": False, + "slug": slug }) # Children Data @@ -284,12 +354,14 @@ def fetch_untagged_data(pkg, workspace, repo, img, detailed=False): if detailed: for child in child_digests: - data = get_digest_data(workspace, repo, img, child['digest'], "(untagged)", platform=child['platform']) - results.append(data) + # FIX: get_digest_data returns a dict, not a tuple + row = get_digest_data(workspace, repo, img, child['digest'], "(untagged)", platform=child['platform']) + results.append(row) + results.append("SECTION") return results, slug -def get_untagged_images(workspace, repo, img, delete=False, detailed=False): +def get_untagged_images(workspace, repo, img, delete=False, detailed=False, progress=None): api_url = f"https://api.cloudsmith.io/v1/packages/{workspace}/{repo}/" query = urlencode({'query': f"name:{img}"}) full_url = f"{api_url}?{query}" @@ -306,11 +378,17 @@ def get_untagged_images(workspace, repo, img, delete=False, detailed=False): if not untagged_pkgs: return None + + logger.info(f"Found {len(untagged_pkgs)} untagged manifest lists for image: {img}") # Fetch data first results_map = {} packages_to_delete = [] + task_id = None + if progress: + task_id = progress.add_task(f"[cyan]Analyzing {img}[/cyan] ({len(untagged_pkgs)} untagged)", total=len(untagged_pkgs)) + with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: futures = {executor.submit(fetch_untagged_data, pkg, workspace, repo, img, detailed): i for i, pkg in enumerate(untagged_pkgs)} for future in concurrent.futures.as_completed(futures): @@ -321,29 +399,18 @@ def get_untagged_images(workspace, repo, img, delete=False, detailed=False): packages_to_delete.append(slug) except Exception: pass + + if progress and task_id is not None: + progress.advance(task_id) + + if progress and task_id is not None: + progress.remove_task(task_id) # Perform Deletion if requested deleted_slugs = set() failed_slugs = set() if delete and packages_to_delete: - batch_size = 10 - def delete_pkg_task(slug): - del_url = f"https://api.cloudsmith.io/v1/packages/{workspace}/{repo}/{slug}/" - return slug, make_request(del_url, method='DELETE') - - for i in range(0, len(packages_to_delete), batch_size): - batch = packages_to_delete[i:i + batch_size] - with concurrent.futures.ThreadPoolExecutor(max_workers=batch_size) as executor: - futures = [executor.submit(delete_pkg_task, slug) for slug in batch] - for future in concurrent.futures.as_completed(futures): - slug, success = future.result() - if success: - deleted_slugs.add(slug) - else: - failed_slugs.add(slug) - - if i + batch_size < len(packages_to_delete): - time.sleep(1.1) + deleted_slugs, failed_slugs = batch_delete_packages(workspace, repo, packages_to_delete) # Build Result Groups groups = [] @@ -360,36 +427,68 @@ def delete_pkg_task(slug): action_str = "Failed" for row in rows: - row['action'] = action_str - # Remove internal slug - if 'slug' in row: del row['slug'] + if isinstance(row, dict): + row['action'] = action_str + # Remove internal slug + if 'slug' in row: del row['slug'] groups.append(rows) return groups -def get_image_analysis(workspace, repo, img_name, detailed=False): - tags_url = f"{CLOUDSMITH_URL}/v2/{workspace}/{repo}/{img_name}/tags/list" - tags_json = make_request(tags_url, {"Accept": "application/vnd.oci.image.manifest.v1+json", "Cache-Control": "no-cache"}) +def get_image_analysis(workspace, repo, img_name, delete_all=False, delete_tag=None, detailed=False, progress=None): + # Switch to Cloudsmith API to avoid upstream tags and allow filtering + api_url = f"https://api.cloudsmith.io/v1/packages/{workspace}/{repo}/" - tags = [] - if tags_json: - raw_tags = find_key_recursive(tags_json, 'tags') - flat_tags = [] - for item in raw_tags: - if isinstance(item, list): - flat_tags.extend(item) - else: - flat_tags.append(item) + # Construct query: format:docker AND name:{img_name} (if provided) + query_parts = ["format:docker"] + if img_name: + query_parts.append(f"name:{img_name}") + + query = urlencode({'query': " AND ".join(query_parts)}) + next_url = f"{api_url}?{query}" + + tags = set() + + # Pagination Loop + while next_url: + result = make_request(next_url, {"Cache-Control": "no-cache"}, return_headers=True) + if not result: + break + + data, headers = result - tags = sorted(list(set(flat_tags))) - - if not tags: + for pkg in data: + # pkg['tags'] is a dict like {'version': [...]} + version_tags = pkg.get('tags', {}).get('version', []) + for t in version_tags: + tags.add(t) + + # Handle Pagination via Link header + next_url = None + link_header = headers.get('Link') + if link_header: + links = link_header.split(',') + for link in links: + if 'rel="next"' in link: + # Format: ; rel="next" + next_url = link.split(';')[0].strip('<> ') + break + + sorted_tags = sorted(list(tags)) + + if not sorted_tags: + logger.info(f"No tags found for image: {img_name}") return None groups = [] + + task_id = None + if progress: + task_id = progress.add_task(f"[cyan]Analyzing {img_name}[/cyan] ({len(sorted_tags)} tags)", total=len(sorted_tags)) + with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: - future_to_tag = {executor.submit(fetch_tag_data, workspace, repo, img_name, t, detailed): t for t in tags} + future_to_tag = {executor.submit(fetch_tag_data, workspace, repo, img_name, t, detailed): t for t in sorted_tags} results = {} for future in concurrent.futures.as_completed(future_to_tag): @@ -398,21 +497,71 @@ def get_image_analysis(workspace, repo, img_name, detailed=False): results[tag] = future.result() except Exception: pass + + if progress and task_id is not None: + progress.advance(task_id) - for t in tags: + for t in sorted_tags: if t in results: groups.append(results[t]) + + if progress and task_id is not None: + progress.remove_task(task_id) + + # Deletion Logic for Tagged Images + packages_to_delete = [] + for group in groups: + if not group: continue + parent = group[0] + # Only delete manifest lists + if parent.get('type') == 'manifest/list': + should_delete = False + if delete_all: + should_delete = True + elif delete_tag and parent.get('tag') == delete_tag: + should_delete = True + + if should_delete and parent.get('slug'): + packages_to_delete.append(parent['slug']) + + if packages_to_delete: + logger.info(f"Identified {len(packages_to_delete)} tagged packages to delete for image: {img_name}") + + deleted_slugs = set() + failed_slugs = set() + if packages_to_delete: + deleted_slugs, failed_slugs = batch_delete_packages(workspace, repo, packages_to_delete) + + # Update Action Status in Groups + for group in groups: + if not group: continue + parent = group[0] + slug = parent.get('slug') + + action_str = "" + if slug in deleted_slugs: + action_str = "Deleted" + elif slug in failed_slugs: + action_str = "Failed" + + if action_str: + parent['action'] = action_str + # Optionally propagate to children if needed, but usually just parent row + for row in group: + if isinstance(row, dict): + row['action'] = action_str + return groups -def process_image(org, repo, img_name, args): +def process_image(org, repo, img_name, args, progress=None): if args.untagged or args.untagged_delete: - return get_untagged_images(org, repo, img_name, delete=args.untagged_delete, detailed=args.detailed) + return get_untagged_images(org, repo, img_name, delete=args.untagged_delete, detailed=args.detailed, progress=progress) else: - return get_image_analysis(org, repo, img_name, detailed=args.detailed) + return get_image_analysis(org, repo, img_name, delete_all=args.delete_all, delete_tag=args.delete_tag, detailed=args.detailed, progress=progress) def render_table(image_name, groups, is_untagged=False, has_action=False): - title = f"Untagged Manifest Lists: {image_name}" if is_untagged else f"Image Analysis: {image_name}" - table = Table(title=title, box=box.ROUNDED) + # --- Table Setup --- + table = Table(title=f"{'Untagged' if is_untagged else 'Tagged'} Image Analysis: {image_name}", box=box.ROUNDED) table.add_column("Tag", style="cyan") table.add_column("Type", style="magenta") table.add_column("Platform") @@ -422,40 +571,81 @@ def render_table(image_name, groups, is_untagged=False, has_action=False): if has_action: table.add_column("Action", style="bold red") - for i, group in enumerate(groups): - if i > 0: - table.add_section() + # --- Row Rendering --- + for group in groups: + if not group: continue + parent = group[0] - for row in group: - # Format for Table - tag_display = row['tag'] - if row['is_child']: - tag_display = f" └─ {row['tag']}" - else: - tag_display = f"[bold cyan]{row['tag']}[/bold cyan]" - - type_display = row['type'] - if type_display == 'manifest/list': - type_display = "[magenta]manifest/list[/magenta]" - - status_display = format_status(row['status']) - - dl_display = str(row['downloads']) - if row['type'] == 'manifest/list': - dl_display = f"[green]{dl_display}[/green]" - - digest_display = f"[dim]{row['digest']}[/dim]" - - row_data = [tag_display, type_display, row['platform'], status_display, dl_display, digest_display] - + # Action string for delete status + action_str = parent.get('action', "") + + # Parent Row + if is_untagged: + table.add_row( + parent.get("tag", ""), + parent.get("type", ""), + parent.get("platform", ""), + format_status(parent.get("status", "")), + f"[green]{parent.get('downloads', 0)}[/green]", + f"[dim]{parent.get('digest', '')}[/dim]", + action_str if has_action else None + ) + else: + row_data = [ + f"[bold cyan]{parent.get('tag', '')}[/bold cyan]", + "[magenta]manifest/list[/magenta]", + "multi", + format_status(parent.get("status", "")), + f"[green]{parent.get('downloads', 0)}[/green]", + f"[dim]{parent.get('digest', '')}[/dim]" + ] if has_action: - row_data.append(row.get('action', '')) + row_data.append(action_str) table.add_row(*row_data) - + + # Child Rows + if len(group) > 1: + for row in group[1:]: + if row == "SECTION": + table.add_section() + else: + row_data = [ + f" └─ {row.get('tag', '')}", + row.get("type", ""), + row.get("platform", ""), + format_status(row.get("status", "")), + f"[green]{row.get('downloads', 0)}[/green]", + f"[dim]{row.get('digest', '')}[/dim]" + ] + if has_action: + row_data.append(row.get('action', '')) + + table.add_row(*row_data) + return table def main(): + # Parse args first to configure logging + parser = argparse.ArgumentParser(description="Docker Multi-Arch Inspector") + parser.add_argument("org", help="Cloudsmith Organization/User") + parser.add_argument("repo", help="Cloudsmith Repository") + parser.add_argument("img", nargs="?", help="Image Name (Optional - if omitted, scans all images)") + parser.add_argument("--untagged", action="store_true", help="Find untagged manifest lists") + parser.add_argument("--untagged-delete", action="store_true", help="Delete untagged manifest lists") + parser.add_argument("--delete-all", action="store_true", help="Delete ALL detected manifest lists") + parser.add_argument("--delete-tag", help="Delete manifest lists matching this specific tag") + parser.add_argument("--detailed", action="store_true", help="Show detailed breakdown of digests") + parser.add_argument("--output", choices=['table', 'json', 'csv'], default='table', help="Output format (default: table)") + parser.add_argument("--debug-log", action="store_true", help="Enable debug logging to file") + + args = parser.parse_args() + + # Re-configure logging based on args + global logger + logger = setup_logging(args.debug_log) + + logger.info("--- Script Started ---") console.print(r"""[bold cyan] ██████╗██╗ ██████╗ ██╗ ██╗██████╗ ███████╗███╗ ███╗██╗████████╗██╗ ██╗ ██╔════╝██║ ██╔═══██╗██║ ██║██╔══██╗██╔════╝████╗ ████║██║╚══██╔══╝██║ ██║ @@ -478,10 +668,14 @@ def main(): parser.add_argument("img", nargs="?", help="Image Name (Optional - if omitted, scans all images)") parser.add_argument("--untagged", action="store_true", help="Find untagged manifest lists") parser.add_argument("--untagged-delete", action="store_true", help="Delete untagged manifest lists") + parser.add_argument("--delete-all", action="store_true", help="Delete ALL detected manifest lists") + parser.add_argument("--delete-tag", help="Delete manifest lists matching this specific tag") parser.add_argument("--detailed", action="store_true", help="Show detailed breakdown of digests") parser.add_argument("--output", choices=['table', 'json', 'csv'], default='table', help="Output format (default: table)") + parser.add_argument("--debug-log", action="store_true", help="Enable debug logging to file") args = parser.parse_args() + logger.info(f"Arguments: {args}") images_to_scan = [] @@ -490,14 +684,19 @@ def main(): else: if args.output == 'table': console.print(f"[bold]Fetching catalog for {args.org}/{args.repo}...[/bold]") + + logger.info(f"Fetching catalog for {args.org}/{args.repo}") catalog_url = f"{CLOUDSMITH_URL}/v2/{args.org}/{args.repo}/_catalog" catalog_json = make_request(catalog_url, {"Accept": "application/json", "Cache-Control": "no-cache"}) if catalog_json and 'repositories' in catalog_json: images_to_scan = catalog_json['repositories'] + logger.info(f"Found {len(images_to_scan)} images in catalog.") else: + msg = "Failed to fetch catalog or no images found." if args.output == 'table': - console.print("[red]Failed to fetch catalog or no images found.[/red]") + console.print(f"[red]{msg}[/red]") + logger.error(msg) sys.exit(1) # Only show progress bar for table output @@ -516,6 +715,7 @@ def __enter__(self): return self def __exit__(self, *args): pass def add_task(self, *args, **kwargs): return None def advance(self, *args, **kwargs): pass + def remove_task(self, *args, **kwargs): pass @property def console(self): return console # fallback progress_ctx = DummyProgress() @@ -530,7 +730,7 @@ def console(self): return console # fallback executor = concurrent.futures.ThreadPoolExecutor(max_workers=5) try: future_to_img = { - executor.submit(process_image, args.org, args.repo, img, args): img + executor.submit(process_image, args.org, args.repo, img, args, progress=progress): img for img in images_to_scan } @@ -541,19 +741,22 @@ def console(self): return console # fallback if groups: collected_results.append((img_name, groups)) except Exception as e: + logger.error(f"Error processing {img_name}: {e}") if args.output == 'table': progress.console.print(f"[red]Error processing {img_name}: {e}[/red]") if args.output == 'table': progress.advance(task) + # Normal shutdown executor.shutdown(wait=True) except KeyboardInterrupt: + # Force shutdown without waiting executor.shutdown(wait=False, cancel_futures=True) raise - # Sort results by image name + # Sort results by image name and print collected_results.sort(key=lambda x: x[0]) if not collected_results: @@ -561,6 +764,7 @@ def console(self): return console # fallback console.print("[yellow]No matching images or tags found.[/yellow]") elif args.output == 'json': print("[]") + logger.info("No matching images or tags found.") return # --- Output Handling --- @@ -568,48 +772,47 @@ def console(self): return console # fallback if args.output == 'table': for img_name, groups in collected_results: is_untagged = args.untagged or args.untagged_delete - has_action = args.untagged_delete - table = render_table(img_name, groups, is_untagged, has_action) + has_action = args.untagged_delete or args.delete_all or (args.delete_tag is not None) + table = render_table(image_name=img_name, groups=groups, is_untagged=is_untagged, has_action=has_action) console.print(table) console.print("") - elif args.output == 'json': - # Flatten structure for JSON: List of objects, each with 'image' field - json_output = [] + # JSON Output for all images + all_results = {} for img_name, groups in collected_results: - for group in groups: - for row in group: - row_copy = row.copy() - row_copy['image'] = img_name - json_output.append(row_copy) - print(json.dumps(json_output, indent=2)) - + all_results[img_name] = groups + + json_output = json.dumps(all_results, indent=2) + console.print(json_output) elif args.output == 'csv': - # Flatten structure for CSV - csv_rows = [] - fieldnames = ['image', 'tag', 'type', 'platform', 'status', 'downloads', 'digest', 'is_child', 'action'] + # CSV Output (simple flat structure) + csv_lines = [] + csv_lines.append(["Image", "Tag", "Type", "Platform", "Status", "Downloads", "Digest", "Action"]) # Header for img_name, groups in collected_results: for group in groups: - for row in group: - row_copy = row.copy() - row_copy['image'] = img_name - # Ensure all fields exist - for f in fieldnames: - if f not in row_copy: - row_copy[f] = '' - csv_rows.append(row_copy) + if group == "SECTION": + continue + # Flat CSV row + csv_lines.append([ + img_name, + group.get("tag", ""), + group.get("type", ""), + group.get("platform", ""), + group.get("status", ""), + str(group.get("downloads", 0)), + group.get("digest", ""), + group.get("action", "") + ]) - writer = csv.DictWriter(sys.stdout, fieldnames=fieldnames) - writer.writeheader() - writer.writerows(csv_rows) + # Print CSV + for line in csv_lines: + console.print(",".join(f'"{str(item)}"' for item in line)) if __name__ == "__main__": try: main() except KeyboardInterrupt: - if 'console' in globals(): - console.print("\n[bold red]Operation cancelled by user.[/bold red]") - else: - print("\nOperation cancelled by user.") - sys.exit(1) + console.print("\n[bold red]Operation cancelled by user.[/bold red]") + # Use os._exit to avoid hanging on shutdown + os._exit(0)