From 966357ace2446a8d53440e655173fa55692b9792 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Mon, 12 May 2025 13:53:46 -0700 Subject: [PATCH 01/10] runner.conda: Configure Micromamba to use the same CA certificates as requests This makes it easier to configure alternative CAs in development/testing in a way that's consistent across components. By default Micromamba (1.5.8) uses the system CA certificates, the sourcing of which varies depending on TLS backend. Micromamba (or really, libmamba) attempts to respect REQUESTS_CA_BUNDLE explicitly itself, but the code appears to contain a logic error that means it's bypassed. And it does not try to fall back to respecting CURL_CA_BUNDLE as requests does. The documentation also says that --cacert-path may be a directory, but the code supports only files. --- nextstrain/cli/requests.py | 5 +++++ nextstrain/cli/runner/conda.py | 5 +++++ setup.py | 1 + 3 files changed, 11 insertions(+) diff --git a/nextstrain/cli/requests.py b/nextstrain/cli/requests.py index 92aa3545..05c1a7bd 100644 --- a/nextstrain/cli/requests.py +++ b/nextstrain/cli/requests.py @@ -21,6 +21,7 @@ aggregate usage metrics, so we do not recommend omitting it unless necessary. """ +import certifi import os import platform import requests @@ -40,6 +41,10 @@ USER_AGENT_MINIMAL = bool(os.environ.get("NEXTSTRAIN_CLI_USER_AGENT_MINIMAL")) +CA_BUNDLE = os.environ.get("REQUESTS_CA_BUNDLE") \ + or os.environ.get("CURL_CA_BUNDLE") \ + or certifi.where() + class Session(requests.Session): def __init__(self): diff --git a/nextstrain/cli/runner/conda.py b/nextstrain/cli/runner/conda.py index 773067c2..62dfee54 100644 --- a/nextstrain/cli/runner/conda.py +++ b/nextstrain/cli/runner/conda.py @@ -346,6 +346,11 @@ def micromamba(*args, add_prefix: bool = True) -> None: # explicit here. "--allow-uninstall", "--allow-downgrade", + + # Honor same method of CA certificate overriding as requests, + # except without support for cert directories (only files). + *(["--cacert-path", requests.CA_BUNDLE] + if not Path(requests.CA_BUNDLE).is_dir() else []), ) env = { diff --git a/setup.py b/setup.py index a27dfbeb..6eae2d49 100644 --- a/setup.py +++ b/setup.py @@ -88,6 +88,7 @@ def find_namespaced_packages(namespace): python_requires = '>=3.8', install_requires = [ + "certifi", "docutils", "fasteners", "importlib_resources >=5.3.0; python_version < '3.11'", From b42d9ca80ca8c2533741eef73968402b6baa24a6 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Mon, 12 May 2025 13:36:06 -0700 Subject: [PATCH 02/10] runner.conda: Use of Anaconda Inc. services is no longer required MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds support for the "channel_alias" Conda config setting¹ so unencumbered mirrors of the open-source, community-led channels (e.g. conda-forge, bioconda) can be used instead of conda.anaconda.org. Drops reliance on api.anaconda.org to query package info in favor of using `micromamba repoquery search` to query channels directly. The new-ish Anaconda license terms² are untenable for many organizations, including Fred Hutch. Avoiding use of licensed resources³ can be achieved thru client (e.g. conda, mamba, etc) configuration and/or network-level blocks (e.g. blocking ). Fred Hutch, for example, has chosen⁴ the latter with a two-pronged solution: local mirroring of the conda-forge and bioconda channels combined with proxied (i.e. controlled) access to conda.anaconda.org via conda-forge.fredhutch.org. Resolves: ¹ ² ³ --- CHANGES.md | 22 +++++ nextstrain/cli/runner/conda.py | 174 ++++++++++++++++++++------------- 2 files changed, 126 insertions(+), 70 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 919b43b7..6c8850b7 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -13,6 +13,28 @@ development source code and as such may not be routinely kept up to date. # __NEXT__ +## Improvements + +* Use of an alternate Conda package repository is now supported during + `nextstrain setup conda` and `nextstrain update conda` if you cannot or do + not want to use the default package repository hosted by Anaconda. Set the + [`NEXTSTRAIN_CONDA_CHANNEL_ALIAS`][] environment variable to the base URL of + the repository. This corresponds to the [`channel_alias` Conda config + setting][]. + ([#436](https://github.com/nextstrain/cli/pull/436)) + +[`NEXTSTRAIN_CONDA_CHANNEL_ALIAS`]: https://docs.nextstrain.org/projects/cli/en/__NEXT__/runtimes/conda/#envvar-NEXTSTRAIN_CONDA_CHANNEL_ALIAS +[`channel_alias` Conda config setting]: https://docs.conda.io/projects/conda/en/latest/user-guide/configuration/settings.html#set-ch-alias + +## Development + +* The `NEXTSTRAIN_CONDA_MICROMAMBA_VERSION` environment variable is no longer + supported (i.e. for use with `nextstrain setup conda`). Please use + [`NEXTSTRAIN_CONDA_MICROMAMBA_URL`][] instead. + ([#436](https://github.com/nextstrain/cli/pull/436)) + +[`NEXTSTRAIN_CONDA_MICROMAMBA_URL`]: https://docs.nextstrain.org/projects/cli/en/__NEXT__/runtimes/conda/#envvar-NEXTSTRAIN_CONDA_MICROMAMBA_URL + # 10.0.0 (7 May 2025) diff --git a/nextstrain/cli/runner/conda.py b/nextstrain/cli/runner/conda.py index 62dfee54..7c141262 100644 --- a/nextstrain/cli/runner/conda.py +++ b/nextstrain/cli/runner/conda.py @@ -38,8 +38,24 @@ Environment variables ===================== +.. envvar:: NEXTSTRAIN_CONDA_CHANNEL_ALIAS + + The base URL to prepend to channel names. Equivalent to the |channel_alias + Conda config setting|_. + + Useful if you want to use a Conda package mirror that's not the default + (i.e. not Anaconda's). + + Defaults to the Conda ecosystem's default of + ``__. + +.. |channel_alias Conda config setting| replace:: ``channel_alias`` Conda config setting +.. _channel_alias Conda config setting: https://docs.conda.io/projects/conda/en/latest/user-guide/configuration/settings.html#set-ch-alias + + .. warning:: - For development only. You don't need to set these during normal operation. + The remaining variables are for development only. You don't need to set + these during normal operation. .. envvar:: NEXTSTRAIN_CONDA_CHANNEL @@ -60,28 +76,33 @@ .. _Conda package match spec: https://docs.conda.io/projects/conda/en/latest/user-guide/concepts/pkg-specs.html#package-match-specifications -.. envvar:: NEXTSTRAIN_CONDA_MICROMAMBA_VERSION +.. envvar:: NEXTSTRAIN_CONDA_MICROMAMBA_URL + + URL of a Micromamba release tarball (e.g. Conda package) to use for setup + and updates. + + May be a full URL or a relative URL to be joined with + :envvar:`NEXTSTRAIN_CONDA_CHANNEL_ALIAS`. Any occurrence of ``{subdir}`` + will be replaced with the current platform's Conda subdir value. - Version of Micromamba to use for setup and upgrade of the Conda runtime - env. Must be a version available from the `conda-forge channel - `__, or the special string - ``latest``. + Replaces the previously-supported development environment variable + ``NEXTSTRAIN_CONDA_MICROMAMBA_VERSION``. - Defaults to ``1.5.8``. + Defaults to ``conda-forge/{subdir}/micromamba-1.5.8-0.tar.bz2``. """ import json import os import platform -import re import shutil import subprocess import sys import tarfile import traceback from pathlib import Path, PurePosixPath -from typing import Iterable, NamedTuple, Optional, cast -from urllib.parse import urljoin, quote as urlquote +from tempfile import TemporaryFile +from typing import IO, Iterable, NamedTuple, Optional, cast +from urllib.parse import urljoin from .. import config from .. import requests from ..errors import InternalError @@ -99,8 +120,11 @@ MICROMAMBA = MICROMAMBA_ROOT / "bin/micromamba" # If you update the version pin below, please update the docstring above too. -MICROMAMBA_VERSION = os.environ.get("NEXTSTRAIN_CONDA_MICROMAMBA_VERSION") \ - or "1.5.8" +MICROMAMBA_URL = os.environ.get("NEXTSTRAIN_CONDA_MICROMAMBA_URL") \ + or "conda-forge/{subdir}/micromamba-1.5.8-0.tar.bz2" + +CHANNEL_ALIAS = os.environ.get("NEXTSTRAIN_CONDA_CHANNEL_ALIAS") \ + or "https://conda.anaconda.org" NEXTSTRAIN_CHANNEL = os.environ.get("NEXTSTRAIN_CONDA_CHANNEL") \ or "nextstrain" @@ -194,23 +218,18 @@ def setup_micromamba(dry_run: bool = False, force: bool = False) -> bool: if not dry_run: shutil.rmtree(str(MICROMAMBA_ROOT)) - # Query for Micromamba release try: - dist = package_distribution("conda-forge", "micromamba", MICROMAMBA_VERSION) + subdir = platform_subdir() except InternalError as err: warn(err) return False - assert dist, f"unable to find micromamba dist" - - # download_url is scheme-less, so add our preferred scheme but in a way - # that won't break if it starts including a scheme later. - dist_url = urljoin("https:", dist["download_url"]) + url = urljoin(CHANNEL_ALIAS, MICROMAMBA_URL.replace('{subdir}', subdir)) - print(f"Requesting Micromamba from {dist_url}…") + print(f"Requesting Micromamba from {url}…") if not dry_run: - response = requests.get(dist_url, stream = True) + response = requests.get(url, stream = True) response.raise_for_status() content_type = response.headers["Content-Type"] @@ -291,7 +310,7 @@ def setup_prefix(dry_run: bool = False, force: bool = False) -> bool: return True -def micromamba(*args, add_prefix: bool = True) -> None: +def micromamba(*args, stdout: IO[bytes] = None, add_prefix: bool = True) -> None: """ Runs our installed Micromamba with appropriate global options and options for prefix and channel selection. @@ -303,6 +322,9 @@ def micromamba(*args, add_prefix: bool = True) -> None: For convenience, all arguments are converted to strings before being passed to :py:func:`subprocess.run`. + Set the keyword-only argument *stdout* to a binary file-like object (with a + file descriptor) to redirect the process's stdout. + Set the keyword-only argument *add_prefix* to false to omit the ``--prefix`` option and channel-related options which are otherwise automatically added. @@ -332,9 +354,9 @@ def micromamba(*args, add_prefix: bool = True) -> None: # own channel. "--override-channels", "--strict-channel-priority", - "--channel", NEXTSTRAIN_CHANNEL, - "--channel", "conda-forge", - "--channel", "bioconda", + "--channel", urljoin(CHANNEL_ALIAS, NEXTSTRAIN_CHANNEL), + "--channel", urljoin(CHANNEL_ALIAS, "conda-forge"), + "--channel", urljoin(CHANNEL_ALIAS, "bioconda"), # Don't automatically pin Python so nextstrain-base deps can change # it on upgrade. @@ -391,7 +413,7 @@ def micromamba(*args, add_prefix: bool = True) -> None: } try: - subprocess.run(argv, env = env, check = True) + subprocess.run(argv, env = env, stdout = stdout, check = True) except (OSError, subprocess.CalledProcessError) as err: raise InternalError(f"Error running {argv!r}") from err @@ -569,12 +591,7 @@ def package_version(spec: str) -> str: version = meta.get("version", "unknown") build = meta.get("build", "unknown") - channel = meta.get("channel", "unknown") - - anaconda_channel = re.search(r'^https://conda[.]anaconda[.]org/(?P.+?)/(?:linux|osx)-64$', channel) - - if anaconda_channel: - channel = anaconda_channel["repo"] + channel = meta.get("channel", "unknown") # full URL; includes subdir return f"{name} {version} ({build}, {channel})" @@ -589,56 +606,51 @@ def package_meta(spec: str) -> Optional[dict]: return json.loads(metafile.read_bytes()) -def package_distribution(channel: str, package: str, version: str = None, label: str = "main") -> Optional[dict]: - # If *package* is a package spec, convert it just to a name. - package = package_name(package) - - if version is None: - version = latest_package_label_version(channel, package, label) - if version is None: - warn(f"Could not find latest version of package {package!r} with label {label!r}.", - "\nUsing 'latest' version instead, which will be the latest version of the package regardless of label.") - version = "latest" - - response = requests.get(f"https://api.anaconda.org/release/{urlquote(channel)}/{urlquote(package)}/{urlquote(version)}") - response.raise_for_status() +def package_distribution(channel: str, spec: str) -> Optional[dict]: + with TemporaryFile() as tmp: + micromamba( + "repoquery", "search", spec, - dists = response.json().get("distributions", []) + # Channel (repo) to search + "--override-channels", + "--strict-channel-priority", + "--channel", urljoin(CHANNEL_ALIAS, channel), - system = platform.system() - machine = platform.machine() + # Always check that we have latest package index + "--repodata-ttl", 0, - if (system, machine) == ("Linux", "x86_64"): - subdir = "linux-64" - elif (system, machine) in {("Darwin", "x86_64"), ("Darwin", "arm64")}: - # Use the x86 arch even on arm (https://docs.nextstrain.org/en/latest/reference/faq.html#why-intel-miniconda-installer-on-apple-silicon) - subdir = "osx-64" - else: - raise InternalError(f"Unsupported system/machine: {system}/{machine}") + # Emit JSON so we can process it + "--json", - # Releases have other attributes related to system/machine, but they're - # informational-only and subdir is what Conda *actually* uses to - # differentiate distributions/files/etc. Use it too so we have the same - # view of reality. - subdir_dists = (d for d in dists if d.get("attrs", {}).get("subdir") == subdir) - dist = max(subdir_dists, default=None, key=lambda d: d.get("attrs", {}).get("build_number", 0)) + # Honor same method of CA certificate overriding as requests, + # except without support for cert directories (only files). + *(["--cacert-path", requests.CA_BUNDLE] + if not Path(requests.CA_BUNDLE).is_dir() else []), - return dist + add_prefix = False, + stdout = tmp) + tmp.seek(0) -def package_name(spec: str) -> str: - return PackageSpec.parse(spec).name + result = json.load(tmp).get("result", {}) + assert (status := result.get("status")) == "OK", \ + f"repoquery {status=}, not OK" -def latest_package_label_version(channel: str, package: str, label: str) -> Optional[str]: - response = requests.get(f"https://api.anaconda.org/package/{urlquote(channel)}/{urlquote(package)}/files") - response.raise_for_status() + dists = result.get("pkgs", []) - label_files = (file for file in response.json() if label in file.get("labels", [])) # Default '0-dev' should be the lowest version according to PEP440 # See https://peps.python.org/pep-0440/#summary-of-permitted-suffixes-and-relative-ordering - latest_file: dict = max(label_files, default={}, key=lambda file: parse_version_lax(file.get('version', '0-dev'))) - return latest_file.get("version") + dist = max(dists, default = None, key = lambda d: (parse_version_lax(d.get("version", "0-dev")), d.get("build_number", 0))) + + if not dist: + return None + + return dist + + +def package_name(spec: str) -> str: + return PackageSpec.parse(spec).name class PackageSpec(NamedTuple): @@ -664,3 +676,25 @@ def parse(spec): return PackageSpec(parts[0], parts[1], None) except IndexError: return PackageSpec(parts[0], None, None) + + +def platform_subdir() -> str: + """ + Conda subdir to use for the :mod:`platform` on which we're running. + + One of ``linux-64``, ``osx-64``, or ``osx-arm64``. + + Raises an :exc:`InternalError` if the platform is currently unsupported. + """ + system = platform.system() + machine = platform.machine() + + if (system, machine) == ("Linux", "x86_64"): + subdir = "linux-64" + elif (system, machine) in {("Darwin", "x86_64"), ("Darwin", "arm64")}: + # Use the x86 arch even on arm (https://docs.nextstrain.org/en/latest/reference/faq.html#why-intel-miniconda-installer-on-apple-silicon) + subdir = "osx-64" + else: + raise InternalError(f"Unsupported system/machine: {system}/{machine}") + + return subdir From c96395c6a4912bec053cf9efdbe29ee6df091bf4 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Mon, 19 May 2025 10:47:16 -0700 Subject: [PATCH 03/10] runner.conda: Stop checking Content-Type for Micromamba download MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Just try to open it as a tar archive (possibly compressed) and handle the error if it's not readable. Now that we're not always fetching from a single expected source (i.e. Anaconda), this avoids issues with different mirrors reporting different Content-Types (not to mention responses which claim to an expected Content-Type but aren't actually). We may still want to use Content-Type to dispatch to different format handlers in the future, but right now we only handle tar. Motivated by @victorlin's review.¹ ¹ --- nextstrain/cli/runner/conda.py | 35 ++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/nextstrain/cli/runner/conda.py b/nextstrain/cli/runner/conda.py index 7c141262..604e4f41 100644 --- a/nextstrain/cli/runner/conda.py +++ b/nextstrain/cli/runner/conda.py @@ -105,7 +105,7 @@ from urllib.parse import urljoin from .. import config from .. import requests -from ..errors import InternalError +from ..errors import InternalError, UserError from ..paths import RUNTIMES from ..types import Env, RunnerModule, SetupStatus, SetupTestResults, UpdateStatus from ..util import capture_output, colored, exec_or_return, parse_version_lax, runner_name, setup_tests_ok, test_rosetta_enabled, warn @@ -233,21 +233,24 @@ def setup_micromamba(dry_run: bool = False, force: bool = False) -> bool: response.raise_for_status() content_type = response.headers["Content-Type"] - assert content_type == "application/x-tar", \ - f"unknown content-type for micromamba dist: {content_type}" - - with tarfile.open(fileobj = response.raw, mode = "r|*") as tar: - # Ignore archive members starting with "/" and or including ".." parts, - # as these can be used (maliciously or accidentally) to overwrite - # unintended files (e.g. files outside of MICROMAMBA_ROOT). - safe_members = ( - member - for member in tar - if not member.name.startswith("/") - and ".." not in PurePosixPath(member.name).parts) - - print(f"Downloading and extracting Micromamba to {MICROMAMBA_ROOT}…") - tar.extractall(path = str(MICROMAMBA_ROOT), members = safe_members) + try: + with tarfile.open(fileobj = response.raw, mode = "r|*") as tar: + # Ignore archive members starting with "/" and or including ".." parts, + # as these can be used (maliciously or accidentally) to overwrite + # unintended files (e.g. files outside of MICROMAMBA_ROOT). + safe_members = ( + member + for member in tar + if not member.name.startswith("/") + and ".." not in PurePosixPath(member.name).parts) + + print(f"Downloading and extracting Micromamba to {MICROMAMBA_ROOT}…") + tar.extractall(path = str(MICROMAMBA_ROOT), members = safe_members) + + except tarfile.TarError as err: + raise UserError(f""" + Failed to extract {url} (Content-Type: {content_type}) as tar archive: {err} + """) else: print(f"Downloading and extracting Micromamba to {MICROMAMBA_ROOT}…") From 405df568ec3a9d685f47e05822101727a22adf3b Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Wed, 14 May 2025 11:05:40 -0700 Subject: [PATCH 04/10] runner.conda: Drop sorting by build_number and note why we ignore build variants The sorting by build number originally came about when we were still using the package_distribution() function to find the Micromamba dist to download. As Micromamba had used "fixup" builds in the past, it seemed useful/important to include build number in the sort. However, builds can also be used for "variants", e.g. differing dependencies, and resolving which "variant" build to use requires the solver not a simple sort. We don't need to support the way Micromamba used/uses builds anymore, and since our usage of multiple builds for nextstrain-base is likely to be for variants, we can't properly decide which to use anyway. --- nextstrain/cli/runner/conda.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/nextstrain/cli/runner/conda.py b/nextstrain/cli/runner/conda.py index 604e4f41..c93c6418 100644 --- a/nextstrain/cli/runner/conda.py +++ b/nextstrain/cli/runner/conda.py @@ -514,6 +514,20 @@ def update() -> UpdateStatus: """ Update all installed packages with Micromamba. """ + # In the comparisons and logic below, we handle selecting the version to + # update to but still let Micromamba select the specific package _build_ to + # use. While our package creation automation currently doesn't support + # multiple builds of a version, it's worth noting that 1) Conda's data + # model allows for it, and 2) we may start producing multiple builds in the + # future (e.g. for varying x86_64-microarch-level dependencies¹ or other + # platform compatibility reasons). If we do, the code below should still + # work fine. However, if we start making "fixup" builds of existing + # versions (e.g. build 1 of version X after build 0 of version X), the "do + # we need to update?" logic below would not deal with them properly. + # -trs, 9 April 2025 & 13 May 2025 + # + # ¹ + current_version = (package_meta(NEXTSTRAIN_BASE) or {}).get("version") # We accept a package match spec, which one to three space-separated parts.¹ @@ -642,9 +656,15 @@ def package_distribution(channel: str, spec: str) -> Optional[dict]: dists = result.get("pkgs", []) - # Default '0-dev' should be the lowest version according to PEP440 - # See https://peps.python.org/pep-0440/#summary-of-permitted-suffixes-and-relative-ordering - dist = max(dists, default = None, key = lambda d: (parse_version_lax(d.get("version", "0-dev")), d.get("build_number", 0))) + # Default '0-dev' should be the lowest version according to PEP440.¹ + # + # We're intentionally ignoring build number as we let Micromamba sort out + # the best build variant for a given version of our nextstrain-base + # package. We currently do not produce multiple builds per version, but we + # may in the future. See also the comment at the top of update(). + # + # ¹ + dist = max(dists, default = None, key = lambda d: parse_version_lax(d.get("version", "0-dev"))) if not dist: return None From 3d149fd69e49f584ba1da5108d18b800861d1afa Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Wed, 14 May 2025 11:02:25 -0700 Subject: [PATCH 05/10] runner.conda: Simplify handling of NEXTSTRAIN_BASE Removes special-casing for when NEXTSTRAIN_BASE is a package spec (vs. a plain package name) and removes a fall back code path that existed as a hedge against our correct handling of specs. Now that package_distribution() uses `micromamba repoquery search`, it can handle both plain package names and full package specs. Introduces a tiny PackageDistribution data structure in order to be explicit about what info we use (and what keys *must* exist). --- nextstrain/cli/runner/conda.py | 64 ++++++++++++---------------------- 1 file changed, 23 insertions(+), 41 deletions(-) diff --git a/nextstrain/cli/runner/conda.py b/nextstrain/cli/runner/conda.py index c93c6418..7bf6fa11 100644 --- a/nextstrain/cli/runner/conda.py +++ b/nextstrain/cli/runner/conda.py @@ -271,22 +271,10 @@ def setup_prefix(dry_run: bool = False, force: bool = False) -> bool: if not dry_run: shutil.rmtree(str(PREFIX)) - # We accept a package match spec, which one to three space-separated parts.¹ - # If we got a spec, then we use it as-is. - # - # ¹ - # - if " " in NEXTSTRAIN_BASE.strip(): - install_spec = NEXTSTRAIN_BASE + if install_dist := package_distribution(NEXTSTRAIN_CHANNEL, NEXTSTRAIN_BASE): + install_spec = f"{install_dist.name} =={install_dist.version}" else: - latest_version = (package_distribution(NEXTSTRAIN_CHANNEL, NEXTSTRAIN_BASE) or {}).get("version") - - if latest_version: - install_spec = f"{NEXTSTRAIN_BASE} =={latest_version}" - else: - warn(f"Unable to find latest version of {NEXTSTRAIN_BASE} package; falling back to non-specific install") - - install_spec = NEXTSTRAIN_BASE + raise UserError(f"Unable to find latest version of {NEXTSTRAIN_BASE} package in {NEXTSTRAIN_CHANNEL}") # Create environment print(f"Installing Conda packages into {PREFIX}…") @@ -528,37 +516,26 @@ def update() -> UpdateStatus: # # ¹ - current_version = (package_meta(NEXTSTRAIN_BASE) or {}).get("version") - - # We accept a package match spec, which one to three space-separated parts.¹ - # If we got a spec, then we need to handle updates a bit differently. - # - # ¹ - # - if " " in NEXTSTRAIN_BASE.strip(): - pkg = PackageSpec.parse(NEXTSTRAIN_BASE) - print(colored("bold", f"Updating {pkg.name} from {current_version} to {pkg.version_spec}…")) - update_spec = NEXTSTRAIN_BASE + nextstrain_base = PackageSpec.parse(NEXTSTRAIN_BASE) - else: - latest_version = (package_distribution(NEXTSTRAIN_CHANNEL, NEXTSTRAIN_BASE) or {}).get("version") + current_version = (package_meta(NEXTSTRAIN_BASE) or {}).get("version") - if latest_version: - if latest_version == current_version: - print(f"Conda package {NEXTSTRAIN_BASE} {current_version} already at latest version") - print() - return True + if latest_dist := package_distribution(NEXTSTRAIN_CHANNEL, NEXTSTRAIN_BASE): + assert latest_dist.name == nextstrain_base.name - print(colored("bold", f"Updating Conda package {NEXTSTRAIN_BASE} from {current_version} to {latest_version}…")) + latest_version = latest_dist.version - update_spec = f"{NEXTSTRAIN_BASE} =={latest_version}" + if latest_version == current_version: + print(f"Conda package {nextstrain_base.name} {current_version} already at latest version") + print() + return True - else: - warn(f"Unable to find latest version of {NEXTSTRAIN_BASE} package; falling back to non-specific update") + print(colored("bold", f"Updating Conda package {nextstrain_base.name} from {current_version} to {latest_version}…")) - print(colored("bold", f"Updating Conda package {NEXTSTRAIN_BASE} from {current_version}…")) + update_spec = f"{nextstrain_base.name} =={latest_version}" - update_spec = NEXTSTRAIN_BASE + else: + raise UserError(f"Unable to find latest version of {NEXTSTRAIN_BASE} package in {NEXTSTRAIN_CHANNEL}") print() print(f"Updating Conda packages in {PREFIX}…") @@ -623,7 +600,7 @@ def package_meta(spec: str) -> Optional[dict]: return json.loads(metafile.read_bytes()) -def package_distribution(channel: str, spec: str) -> Optional[dict]: +def package_distribution(channel: str, spec: str) -> Optional['PackageDistribution']: with TemporaryFile() as tmp: micromamba( "repoquery", "search", spec, @@ -669,7 +646,12 @@ def package_distribution(channel: str, spec: str) -> Optional[dict]: if not dist: return None - return dist + return PackageDistribution(dist["name"], dist["version"]) + + +class PackageDistribution(NamedTuple): + name: str + version: str def package_name(spec: str) -> str: From 09e488f17808cb0b2ede1d8000501ba4c79df3d2 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Mon, 12 May 2025 13:53:16 -0700 Subject: [PATCH 06/10] =?UTF-8?q?runner.conda:=20Explicitly=20handle=20sub?= =?UTF-8?q?dir=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …rather than relying on Micromamba's implicit subdir selection based on the arch it was compiled for. Aside from making explicit something that's important but implicit, this also makes it possible to implement explicit subdir switching/upgrading (e.g. osx-64 → osx-arm64). No significant behaviour change is expected. --- nextstrain/cli/runner/conda.py | 43 ++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/nextstrain/cli/runner/conda.py b/nextstrain/cli/runner/conda.py index 7bf6fa11..524f89de 100644 --- a/nextstrain/cli/runner/conda.py +++ b/nextstrain/cli/runner/conda.py @@ -271,18 +271,18 @@ def setup_prefix(dry_run: bool = False, force: bool = False) -> bool: if not dry_run: shutil.rmtree(str(PREFIX)) - if install_dist := package_distribution(NEXTSTRAIN_CHANNEL, NEXTSTRAIN_BASE): + if install_dist := package_distribution(NEXTSTRAIN_CHANNEL, NEXTSTRAIN_BASE, platform_subdir()): install_spec = f"{install_dist.name} =={install_dist.version}" else: raise UserError(f"Unable to find latest version of {NEXTSTRAIN_BASE} package in {NEXTSTRAIN_CHANNEL}") # Create environment print(f"Installing Conda packages into {PREFIX}…") - print(f" - {install_spec}") + print(f" - {install_spec} ({install_dist.subdir})") if not dry_run: try: - micromamba("create", install_spec) + micromamba("create", install_spec, "--platform", install_dist.subdir) except InternalError as err: warn(err) traceback.print_exc() @@ -518,31 +518,36 @@ def update() -> UpdateStatus: nextstrain_base = PackageSpec.parse(NEXTSTRAIN_BASE) - current_version = (package_meta(NEXTSTRAIN_BASE) or {}).get("version") + current_meta = package_meta(NEXTSTRAIN_BASE) or {} + current_version = current_meta.get("version") + current_subdir = current_meta.get("subdir") or platform_subdir() - if latest_dist := package_distribution(NEXTSTRAIN_CHANNEL, NEXTSTRAIN_BASE): - assert latest_dist.name == nextstrain_base.name + assert current_meta.get("name") in {nextstrain_base.name, None} - latest_version = latest_dist.version + if latest_dist := package_distribution(NEXTSTRAIN_CHANNEL, NEXTSTRAIN_BASE, current_subdir): + assert latest_dist.name == nextstrain_base.name + else: + raise UserError(f"Unable to find latest version of {NEXTSTRAIN_BASE} package in {NEXTSTRAIN_CHANNEL}") - if latest_version == current_version: - print(f"Conda package {nextstrain_base.name} {current_version} already at latest version") - print() - return True + latest_version = latest_dist.version + if latest_version == current_version: + print(f"Conda package {nextstrain_base.name} {current_version} already at latest version") + else: print(colored("bold", f"Updating Conda package {nextstrain_base.name} from {current_version} to {latest_version}…")) - update_spec = f"{nextstrain_base.name} =={latest_version}" + # Anything to do? + if latest_version == current_version: + return True - else: - raise UserError(f"Unable to find latest version of {NEXTSTRAIN_BASE} package in {NEXTSTRAIN_CHANNEL}") + update_spec = f"{latest_dist.name} =={latest_version}" print() print(f"Updating Conda packages in {PREFIX}…") - print(f" - {update_spec}") + print(f" - {update_spec} ({latest_dist.subdir})") try: - micromamba("update", update_spec) + micromamba("update", update_spec, "--platform", latest_dist.subdir) except InternalError as err: warn(err) traceback.print_exc() @@ -600,7 +605,7 @@ def package_meta(spec: str) -> Optional[dict]: return json.loads(metafile.read_bytes()) -def package_distribution(channel: str, spec: str) -> Optional['PackageDistribution']: +def package_distribution(channel: str, spec: str, subdir: str) -> Optional['PackageDistribution']: with TemporaryFile() as tmp: micromamba( "repoquery", "search", spec, @@ -609,6 +614,7 @@ def package_distribution(channel: str, spec: str) -> Optional['PackageDistributi "--override-channels", "--strict-channel-priority", "--channel", urljoin(CHANNEL_ALIAS, channel), + "--platform", subdir, # Always check that we have latest package index "--repodata-ttl", 0, @@ -646,12 +652,13 @@ def package_distribution(channel: str, spec: str) -> Optional['PackageDistributi if not dist: return None - return PackageDistribution(dist["name"], dist["version"]) + return PackageDistribution(dist["name"], dist["version"], dist["subdir"]) class PackageDistribution(NamedTuple): name: str version: str + subdir: str def package_name(spec: str) -> str: From b27cb78af91cf3dce25a0a632f15ea54f27111d7 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Mon, 12 May 2025 13:55:24 -0700 Subject: [PATCH 07/10] runner.conda: Start using osx-arm64 on supported platforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When supported, new setups will get osx-arm64 instead of osx-64 and existing setups will be upgraded from osx-64 → osx-arm64. If the version of nextstrain-base that's being installed/upgraded (or more likely, downgraded to) to does not have an osx-arm64 build, then an osx-64 build will be used instead (provided Rosetta 2 is enabled). Co-authored-by: Victor Lin <13424970+victorlin@users.noreply.github.com> Related-to: Related-to: --- CHANGES.md | 5 +++ nextstrain/cli/runner/conda.py | 63 +++++++++++++++++++++++++--------- nextstrain/cli/util.py | 13 ++++++- 3 files changed, 63 insertions(+), 18 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 6c8850b7..e509319a 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -23,6 +23,11 @@ development source code and as such may not be routinely kept up to date. setting][]. ([#436](https://github.com/nextstrain/cli/pull/436)) +* The Conda runtime no longer requires Rosetta 2 for macOS running on aarch64 + (aka arm64, Apple Silicon, M1/M2/…) hardware. This improves performance when + using the runtime. Run `nextstrain update conda` to receive the update. + ([#436](https://github.com/nextstrain/cli/pull/436)) + [`NEXTSTRAIN_CONDA_CHANNEL_ALIAS`]: https://docs.nextstrain.org/projects/cli/en/__NEXT__/runtimes/conda/#envvar-NEXTSTRAIN_CONDA_CHANNEL_ALIAS [`channel_alias` Conda config setting]: https://docs.conda.io/projects/conda/en/latest/user-guide/configuration/settings.html#set-ch-alias diff --git a/nextstrain/cli/runner/conda.py b/nextstrain/cli/runner/conda.py index 524f89de..0af48a4a 100644 --- a/nextstrain/cli/runner/conda.py +++ b/nextstrain/cli/runner/conda.py @@ -101,14 +101,14 @@ import traceback from pathlib import Path, PurePosixPath from tempfile import TemporaryFile -from typing import IO, Iterable, NamedTuple, Optional, cast +from typing import IO, Iterable, List, NamedTuple, Optional, cast from urllib.parse import urljoin from .. import config from .. import requests from ..errors import InternalError, UserError from ..paths import RUNTIMES from ..types import Env, RunnerModule, SetupStatus, SetupTestResults, UpdateStatus -from ..util import capture_output, colored, exec_or_return, parse_version_lax, runner_name, setup_tests_ok, test_rosetta_enabled, warn +from ..util import capture_output, colored, exec_or_return, parse_version_lax, runner_name, setup_tests_ok, test_rosetta_enabled, uniq, warn RUNTIME_ROOT = RUNTIMES / "conda/" @@ -195,10 +195,14 @@ def run(opts, argv, working_volume = None, extra_env: Env = {}, cpus: int = None def setup(dry_run: bool = False, force: bool = False) -> SetupStatus: + return _setup(dry_run, force) + + +def _setup(dry_run: bool = False, force: bool = False, install_dist: 'PackageDistribution' = None) -> SetupStatus: if not setup_micromamba(dry_run, force): return False - if not setup_prefix(dry_run, force): + if not setup_prefix(dry_run, force, install_dist): return False return True @@ -257,7 +261,7 @@ def setup_micromamba(dry_run: bool = False, force: bool = False) -> bool: return True -def setup_prefix(dry_run: bool = False, force: bool = False) -> bool: +def setup_prefix(dry_run: bool = False, force: bool = False, install_dist: 'PackageDistribution' = None) -> bool: """ Install Conda packages with Micromamba into our ``PREFIX``. """ @@ -271,10 +275,14 @@ def setup_prefix(dry_run: bool = False, force: bool = False) -> bool: if not dry_run: shutil.rmtree(str(PREFIX)) - if install_dist := package_distribution(NEXTSTRAIN_CHANNEL, NEXTSTRAIN_BASE, platform_subdir()): - install_spec = f"{install_dist.name} =={install_dist.version}" - else: - raise UserError(f"Unable to find latest version of {NEXTSTRAIN_BASE} package in {NEXTSTRAIN_CHANNEL}") + if not install_dist: + for subdir in [platform_subdir(), *alternate_platform_subdirs()]: + if install_dist := package_distribution(NEXTSTRAIN_CHANNEL, NEXTSTRAIN_BASE, subdir): + break + else: + raise UserError(f"Unable to find latest version of {NEXTSTRAIN_BASE} package in {NEXTSTRAIN_CHANNEL}") + + install_spec = f"{install_dist.name} =={install_dist.version}" # Create environment print(f"Installing Conda packages into {PREFIX}…") @@ -471,9 +479,6 @@ def supported_os() -> bool: if system == "Linux": return machine == "x86_64" - # Note even on arm64 (e.g. aarch64, Apple Silicon M1) we use x86_64 - # binaries because of current ecosystem compatibility, but Rosetta will - # make it work. elif system == "Darwin": return machine in {"x86_64", "arm64"} @@ -485,8 +490,6 @@ def supported_os() -> bool: yield ('operating system is supported', supported_os()) - yield from test_rosetta_enabled() - yield ("runtime data dir doesn't have spaces", " " not in str(RUNTIME_ROOT)) @@ -524,18 +527,29 @@ def update() -> UpdateStatus: assert current_meta.get("name") in {nextstrain_base.name, None} - if latest_dist := package_distribution(NEXTSTRAIN_CHANNEL, NEXTSTRAIN_BASE, current_subdir): - assert latest_dist.name == nextstrain_base.name + # Prefer the platform subdir if possible (e.g. to migrate from osx-64 → + # osx-arm64). Otherwise, use the prefix's current subdir or alternate + # platform subdirs (e.g. to allow "downgrade" from osx-arm64 → osx-64). + for subdir in uniq([platform_subdir(), current_subdir, *alternate_platform_subdirs()]): + if latest_dist := package_distribution(NEXTSTRAIN_CHANNEL, NEXTSTRAIN_BASE, subdir): + assert latest_dist.name == nextstrain_base.name + break else: raise UserError(f"Unable to find latest version of {NEXTSTRAIN_BASE} package in {NEXTSTRAIN_CHANNEL}") latest_version = latest_dist.version + latest_subdir = latest_dist.subdir if latest_version == current_version: print(f"Conda package {nextstrain_base.name} {current_version} already at latest version") else: print(colored("bold", f"Updating Conda package {nextstrain_base.name} from {current_version} to {latest_version}…")) + # Do we need to force a new setup? + if current_subdir != latest_subdir: + print(f"Updating platform from {current_subdir} → {latest_subdir} by setting up from scratch again…") + return _setup(install_dist = latest_dist, dry_run = False, force = True) + # Anything to do? if latest_version == current_version: return True @@ -703,10 +717,25 @@ def platform_subdir() -> str: if (system, machine) == ("Linux", "x86_64"): subdir = "linux-64" - elif (system, machine) in {("Darwin", "x86_64"), ("Darwin", "arm64")}: - # Use the x86 arch even on arm (https://docs.nextstrain.org/en/latest/reference/faq.html#why-intel-miniconda-installer-on-apple-silicon) + elif (system, machine) == ("Darwin", "x86_64"): subdir = "osx-64" + elif (system, machine) == ("Darwin", "arm64"): + subdir = "osx-arm64" else: raise InternalError(f"Unsupported system/machine: {system}/{machine}") return subdir + + +def alternate_platform_subdirs() -> List[str]: + """ + Alternative Conda subdirs that this :mod:`platform` can use. + """ + system = platform.system() + machine = platform.machine() + + if (system, machine) == ("Darwin", "arm64"): + if setup_tests_ok(test_rosetta_enabled()): + return ["osx-64"] + + return [] diff --git a/nextstrain/cli/util.py b/nextstrain/cli/util.py index 2c0ffe94..77772dbf 100644 --- a/nextstrain/cli/util.py +++ b/nextstrain/cli/util.py @@ -6,7 +6,7 @@ import sys from functools import partial from importlib.metadata import distribution as distribution_info, PackageNotFoundError -from typing import Any, Callable, Iterable, Literal, Mapping, List, Optional, Sequence, Tuple, Union, overload +from typing import Any, Callable, Iterable, Literal, Mapping, List, Optional, Sequence, Tuple, TypeVar, Union, overload from packaging.version import Version, InvalidVersion, parse as parse_version_strict from pathlib import Path, PurePath from shlex import quote as shquote @@ -808,3 +808,14 @@ def __init__(self, version: str, *, compliant: bool, original: str = None): super().__init__(version) self.compliant = compliant self.original = original if original is not None else version + + +T = TypeVar("T") + +def uniq(xs: Iterable[T]) -> Iterable[T]: + """ + Filter an iterable *xs* to its unique elements, preserving order. + + Elements must be hashable. + """ + return dict.fromkeys(xs).keys() From b16f602f45e941b24db2b12588e58920b167a60d Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Thu, 15 May 2025 11:29:45 -0700 Subject: [PATCH 08/10] docs: Fix indentation of Sphinx linkcheck config Whitespace-only change. --- doc/conf.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 438d0ba9..7035d8cd 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -81,16 +81,16 @@ ## string" matching happening, and something like a plain `r'google'` ## regular expression will _NOT_ match all google.com URLs. linkcheck_ignore = [ - # we have links to localhost for explanatory purposes; obviously - # they will never work in the linkchecker - r'^http://127\.0\.0\.1:\d+', - r'^http://localhost:\d+', + # we have links to localhost for explanatory purposes; obviously + # they will never work in the linkchecker + r'^http://127\.0\.0\.1:\d+', + r'^http://localhost:\d+', ] linkcheck_anchors_ignore_for_url = [ - # Github uses anchor-looking links for highlighting lines but - # handles the actual resolution with Javascript, so skip anchor - # checks for Github URLs: - r'^https://github\.com', - r'^https://console\.aws\.amazon\.com/batch/home', - r'^https://console\.aws\.amazon\.com/ec2/v2/home', + # Github uses anchor-looking links for highlighting lines but + # handles the actual resolution with Javascript, so skip anchor + # checks for Github URLs: + r'^https://github\.com', + r'^https://console\.aws\.amazon\.com/batch/home', + r'^https://console\.aws\.amazon\.com/ec2/v2/home', ] From 6c8342553c7b0faa5d2bb935b645dc8be7872720 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Thu, 15 May 2025 11:30:52 -0700 Subject: [PATCH 09/10] docs: Switch Sphinx linkcheck config to fixed-string prefixes Nicer than escaped patterns when we don't need the power of a pattern. --- doc/conf.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 7035d8cd..b8e29358 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -14,6 +14,8 @@ # import sys # sys.path.insert(0, os.path.abspath('.')) +import re + # -- Project information ----------------------------------------------------- @@ -81,16 +83,22 @@ ## string" matching happening, and something like a plain `r'google'` ## regular expression will _NOT_ match all google.com URLs. linkcheck_ignore = [ - # we have links to localhost for explanatory purposes; obviously - # they will never work in the linkchecker - r'^http://127\.0\.0\.1:\d+', - r'^http://localhost:\d+', + # Fixed-string prefixes + *map(re.escape, [ + # we have links to localhost for explanatory purposes; obviously + # they will never work in the linkchecker + 'http://127.0.0.1:', + 'http://localhost:', + ]), ] linkcheck_anchors_ignore_for_url = [ - # Github uses anchor-looking links for highlighting lines but - # handles the actual resolution with Javascript, so skip anchor - # checks for Github URLs: - r'^https://github\.com', - r'^https://console\.aws\.amazon\.com/batch/home', - r'^https://console\.aws\.amazon\.com/ec2/v2/home', + # Fixed-string prefixes + *map(re.escape, [ + # Github uses anchor-looking links for highlighting lines but + # handles the actual resolution with Javascript, so skip anchor + # checks for Github URLs: + 'https://github.com', + 'https://console.aws.amazon.com/batch/home', + 'https://console.aws.amazon.com/ec2/v2/home', + ]), ] From 9cd6964671e774f2b4d6e7118af4c00b5afd21fe Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Thu, 15 May 2025 11:32:02 -0700 Subject: [PATCH 10/10] docs: Ignore https://conda.anaconda.org in Sphinx linkcheck --- doc/conf.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/conf.py b/doc/conf.py index b8e29358..21816c7c 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -89,6 +89,9 @@ # they will never work in the linkchecker 'http://127.0.0.1:', 'http://localhost:', + + # Cloudflare "protection" gets in the way with a 403 + 'https://conda.anaconda.org', ]), ] linkcheck_anchors_ignore_for_url = [