Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/docs/guides/server-deployment.md
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ When using [files](../concepts/dev-environments.md#files) or [repos](../concept

### S3

To use S3 for storing uploaded files, set the `DSTACK_SERVER_S3_BUCKET` and `DSTACK_SERVER_BUCKET_REGION` environment variables.
To use S3 for storing uploaded files, set the `DSTACK_SERVER_S3_BUCKET` and `DSTACK_SERVER_S3_BUCKET_REGION` environment variables.
The bucket must be created beforehand. `dstack` won't try to create it.

??? info "Required permissions"
Expand Down
1 change: 0 additions & 1 deletion src/dstack/_internal/cli/commands/offer.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,6 @@ def _command(self, args: argparse.Namespace):
run_spec = RunSpec(
configuration=conf,
profile=profile,
ssh_key_pub="(dummy)", # TODO: Remove since 0.19.40
)

if args.group_by:
Expand Down
19 changes: 1 addition & 18 deletions src/dstack/_internal/cli/services/configurators/fleet.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,8 @@
from dstack._internal.core.errors import (
CLIError,
ConfigurationError,
MethodNotAllowedError,
ResourceNotExistsError,
ServerClientError,
URLNotFoundError,
)
from dstack._internal.core.models.common import ApplyAction
from dstack._internal.core.models.configurations import ApplyConfigurationType
Expand All @@ -39,7 +37,6 @@
from dstack._internal.utils.common import local_time
from dstack._internal.utils.logging import get_logger
from dstack._internal.utils.ssh import convert_ssh_key_to_pem, generate_public_key, pkey_from_str
from dstack.api._public import Client
from dstack.api.utils import load_profile

logger = get_logger(__name__)
Expand Down Expand Up @@ -233,7 +230,7 @@ def _apply_plan_on_old_server(self, plan: FleetPlan, command_args: argparse.Name

try:
with console.status("Applying plan..."):
fleet = _apply_plan(self.api, plan)
fleet = self.api.client.fleets.apply_plan(project_name=self.api.project, plan=plan)
except ServerClientError as e:
raise CLIError(e.msg)
if command_args.detach:
Expand Down Expand Up @@ -481,17 +478,3 @@ def _fleet_retrying(fleet: Fleet) -> bool:
return False
active_instances = [i for i in fleet.instances if i.status.is_active()]
return len(active_instances) < fleet.spec.configuration.nodes.min


def _apply_plan(api: Client, plan: FleetPlan) -> Fleet:
try:
return api.client.fleets.apply_plan(
project_name=api.project,
plan=plan,
)
except (URLNotFoundError, MethodNotAllowedError):
# TODO: Remove in 0.20
return api.client.fleets.create(
project_name=api.project,
spec=plan.spec,
)
5 changes: 0 additions & 5 deletions src/dstack/_internal/core/backends/azure/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,11 +420,6 @@ def get_image_name(self) -> str:
r"NC(\d+)ads_A100_v4", # NC A100 v4-series [A100 80GB]
r"ND(\d+)asr_v4", # ND A100 v4-series [8xA100 40GB]
r"ND(\d+)amsr_A100_v4", # NDm A100 v4-series [8xA100 80GB]
# Deprecated series
# TODO: Remove after several releases
r"D(\d+)s_v3", # Dsv3-series (general purpose)
r"E(\d+)i?s_v4", # Esv4-series (memory optimized)
r"E(\d+)-(\d+)s_v4", # Esv4-series (constrained vCPU)
]
_SUPPORTED_VM_SERIES_PATTERN = (
"^Standard_(" + "|".join(f"({s})" for s in _SUPPORTED_VM_SERIES_PATTERNS) + ")$"
Expand Down
2 changes: 0 additions & 2 deletions src/dstack/_internal/core/backends/kubernetes/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@
from dstack._internal.core.consts import DSTACK_RUNNER_SSH_PORT
from dstack._internal.core.errors import ComputeError
from dstack._internal.core.models.backends.base import BackendType

# TODO: update import as KNOWN_GPUS becomes public
from dstack._internal.core.models.gateways import (
GatewayComputeConfiguration,
GatewayProvisioningData,
Expand Down
38 changes: 8 additions & 30 deletions src/dstack/_internal/core/compatibility/fleets.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from dstack._internal.core.models.common import IncludeExcludeDictType, IncludeExcludeSetType
from dstack._internal.core.models.fleets import ApplyFleetPlanInput, FleetSpec
from dstack._internal.core.models.instances import Instance


def get_get_plan_excludes(fleet_spec: FleetSpec) -> IncludeExcludeDictType:
Expand All @@ -22,10 +21,6 @@ def get_apply_plan_excludes(plan_input: ApplyFleetPlanInput) -> IncludeExcludeDi
if current_resource is not None:
current_resource_excludes = {}
apply_plan_excludes["current_resource"] = current_resource_excludes
if all(map(_should_exclude_instance_cpu_arch, current_resource.instances)):
current_resource_excludes["instances"] = {
"__all__": {"instance_type": {"resources": {"cpu_arch"}}}
}
return {"plan": apply_plan_excludes}


Expand All @@ -46,35 +41,18 @@ def get_fleet_spec_excludes(fleet_spec: FleetSpec) -> Optional[IncludeExcludeDic
spec_excludes: IncludeExcludeDictType = {}
configuration_excludes: IncludeExcludeDictType = {}
profile_excludes: IncludeExcludeSetType = set()
profile = fleet_spec.profile
if profile.fleets is None:
profile_excludes.add("fleets")
if fleet_spec.configuration.tags is None:
configuration_excludes["tags"] = True
if profile.tags is None:
profile_excludes.add("tags")
if profile.startup_order is None:
profile_excludes.add("startup_order")
if profile.stop_criteria is None:
profile_excludes.add("stop_criteria")
if profile.schedule is None:
profile_excludes.add("schedule")
if (
fleet_spec.configuration.nodes
and fleet_spec.configuration.nodes.min == fleet_spec.configuration.nodes.target
):
configuration_excludes["nodes"] = {"target"}

# Add excludes like this:
#
# if fleet_spec.configuration.tags is None:
# configuration_excludes["tags"] = True
# if fleet_spec.profile.tags is None:
# profile_excludes.add("tags")

if configuration_excludes:
spec_excludes["configuration"] = configuration_excludes
if profile_excludes:
spec_excludes["profile"] = profile_excludes
if spec_excludes:
return spec_excludes
return None


def _should_exclude_instance_cpu_arch(instance: Instance) -> bool:
try:
return instance.instance_type.resources.cpu_arch is None
except AttributeError:
return True
10 changes: 6 additions & 4 deletions src/dstack/_internal/core/compatibility/gateways.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,10 @@ def _get_gateway_configuration_excludes(
configuration: GatewayConfiguration,
) -> IncludeExcludeDictType:
configuration_excludes: IncludeExcludeDictType = {}
if configuration.tags is None:
configuration_excludes["tags"] = True
if configuration.router is None:
configuration_excludes["router"] = True

# Add excludes like this:
#
# if configuration.tags is None:
# configuration_excludes["tags"] = True

return configuration_excludes
2 changes: 0 additions & 2 deletions src/dstack/_internal/core/compatibility/logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,4 @@ def get_poll_logs_excludes(request: PollLogsRequest) -> Optional[IncludeExcludeD
clients backward-compatibility with older servers.
"""
excludes: IncludeExcludeDictType = {}
if request.next_token is None:
excludes["next_token"] = True
return excludes if excludes else None
169 changes: 8 additions & 161 deletions src/dstack/_internal/core/compatibility/runs.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,12 @@
from typing import Optional

from dstack._internal.core.models.common import IncludeExcludeDictType, IncludeExcludeSetType
from dstack._internal.core.models.configurations import LEGACY_REPO_DIR, ServiceConfiguration
from dstack._internal.core.models.runs import ApplyRunPlanInput, JobSpec, JobSubmission, RunSpec
from dstack._internal.core.models.runs import ApplyRunPlanInput, JobSpec, RunSpec
from dstack._internal.server.schemas.runs import GetRunPlanRequest, ListRunsRequest
from dstack._internal.settings import FeatureFlags


def get_list_runs_excludes(list_runs_request: ListRunsRequest) -> IncludeExcludeSetType:
excludes = set()
if list_runs_request.include_jobs:
excludes.add("include_jobs")
if list_runs_request.job_submissions_limit is None:
excludes.add("job_submissions_limit")
excludes: IncludeExcludeSetType = set()
return excludes


Expand All @@ -29,82 +23,7 @@ def get_apply_plan_excludes(plan: ApplyRunPlanInput) -> Optional[IncludeExcludeD
current_resource = plan.current_resource
if current_resource is not None:
current_resource_excludes: IncludeExcludeDictType = {}
current_resource_excludes["status_message"] = True
if current_resource.deployment_num == 0:
current_resource_excludes["deployment_num"] = True
if current_resource.fleet is None:
current_resource_excludes["fleet"] = True
if current_resource.next_triggered_at is None:
current_resource_excludes["next_triggered_at"] = True
apply_plan_excludes["current_resource"] = current_resource_excludes
current_resource_excludes["run_spec"] = get_run_spec_excludes(current_resource.run_spec)
job_submissions_excludes: IncludeExcludeDictType = {}
current_resource_excludes["jobs"] = {
"__all__": {
"job_spec": get_job_spec_excludes([job.job_spec for job in current_resource.jobs]),
"job_submissions": {"__all__": job_submissions_excludes},
}
}
job_submissions = [js for j in current_resource.jobs for js in j.job_submissions]
if all(map(_should_exclude_job_submission_jpd_cpu_arch, job_submissions)):
job_submissions_excludes["job_provisioning_data"] = {
"instance_type": {"resources": {"cpu_arch"}}
}
jrd_offer_excludes = {}
if any(
js.job_runtime_data and js.job_runtime_data.offer for js in job_submissions
) and all(
not js.job_runtime_data
or not js.job_runtime_data.offer
or not js.job_runtime_data.offer.backend_data
for js in job_submissions
):
jrd_offer_excludes["backend_data"] = True
if all(map(_should_exclude_job_submission_jrd_cpu_arch, job_submissions)):
jrd_offer_excludes["instance"] = {"resources": {"cpu_arch"}}
if jrd_offer_excludes:
job_submissions_excludes["job_runtime_data"] = {"offer": jrd_offer_excludes}
if all(js.exit_status is None for js in job_submissions):
job_submissions_excludes["exit_status"] = True
if all(js.status_message == "" for js in job_submissions):
job_submissions_excludes["status_message"] = True
if all(js.error is None for js in job_submissions):
job_submissions_excludes["error"] = True
if all(js.deployment_num == 0 for js in job_submissions):
job_submissions_excludes["deployment_num"] = True
if all(not js.probes for js in job_submissions):
job_submissions_excludes["probes"] = True
latest_job_submission = current_resource.latest_job_submission
if latest_job_submission is not None:
latest_job_submission_excludes: IncludeExcludeDictType = {}
current_resource_excludes["latest_job_submission"] = latest_job_submission_excludes
if _should_exclude_job_submission_jpd_cpu_arch(latest_job_submission):
latest_job_submission_excludes["job_provisioning_data"] = {
"instance_type": {"resources": {"cpu_arch"}}
}
latest_job_submission_jrd_offer_excludes = {}
if (
latest_job_submission.job_runtime_data
and latest_job_submission.job_runtime_data.offer
and not latest_job_submission.job_runtime_data.offer.backend_data
):
latest_job_submission_jrd_offer_excludes["backend_data"] = True
if _should_exclude_job_submission_jrd_cpu_arch(latest_job_submission):
latest_job_submission_jrd_offer_excludes["instance"] = {"resources": {"cpu_arch"}}
if latest_job_submission_jrd_offer_excludes:
latest_job_submission_excludes["job_runtime_data"] = {
"offer": latest_job_submission_jrd_offer_excludes
}
if latest_job_submission.exit_status is None:
latest_job_submission_excludes["exit_status"] = True
if latest_job_submission.status_message == "":
latest_job_submission_excludes["status_message"] = True
if latest_job_submission.error is None:
latest_job_submission_excludes["error"] = True
if latest_job_submission.deployment_num == 0:
latest_job_submission_excludes["deployment_num"] = True
if not latest_job_submission.probes:
latest_job_submission_excludes["probes"] = True
return {"plan": apply_plan_excludes}


Expand All @@ -117,8 +36,6 @@ def get_get_plan_excludes(request: GetRunPlanRequest) -> Optional[IncludeExclude
run_spec_excludes = get_run_spec_excludes(request.run_spec)
if run_spec_excludes is not None:
get_plan_excludes["run_spec"] = run_spec_excludes
if request.max_offers is None:
get_plan_excludes["max_offers"] = True
return get_plan_excludes


Expand All @@ -131,53 +48,13 @@ def get_run_spec_excludes(run_spec: RunSpec) -> IncludeExcludeDictType:
spec_excludes: IncludeExcludeDictType = {}
configuration_excludes: IncludeExcludeDictType = {}
profile_excludes: IncludeExcludeSetType = set()
configuration = run_spec.configuration
profile = run_spec.profile

if not FeatureFlags.LEGACY_REPO_DIR_DISABLED:
if run_spec.repo_dir in [None, LEGACY_REPO_DIR]:
spec_excludes["repo_dir"] = True
elif run_spec.repo_dir == "." and configuration.working_dir in [
None,
LEGACY_REPO_DIR,
".",
]:
spec_excludes["repo_dir"] = True

if configuration.fleets is None:
configuration_excludes["fleets"] = True
if profile is not None and profile.fleets is None:
profile_excludes.add("fleets")
if configuration.tags is None:
configuration_excludes["tags"] = True
if profile is not None and profile.tags is None:
profile_excludes.add("tags")
if isinstance(configuration, ServiceConfiguration) and not configuration.rate_limits:
configuration_excludes["rate_limits"] = True
if configuration.shell is None:
configuration_excludes["shell"] = True
if configuration.docker is None:
configuration_excludes["docker"] = True
if configuration.priority is None:
configuration_excludes["priority"] = True
if configuration.startup_order is None:
configuration_excludes["startup_order"] = True
if profile is not None and profile.startup_order is None:
profile_excludes.add("startup_order")
if configuration.stop_criteria is None:
configuration_excludes["stop_criteria"] = True
if isinstance(configuration, ServiceConfiguration) and not configuration.probes:
configuration_excludes["probes"] = True
if profile is not None and profile.stop_criteria is None:
profile_excludes.add("stop_criteria")
if not configuration.files:
configuration_excludes["files"] = True
if not run_spec.file_archives:
spec_excludes["file_archives"] = True
if configuration.schedule is None:
configuration_excludes["schedule"] = True
if profile is not None and profile.schedule is None:
profile_excludes.add("schedule")
# Add excludes like this:
#
# if run_spec.configuration.tags is None:
# configuration_excludes["tags"] = True
# if run_spec.profile is not None and run_spec.profile.tags is None:
# profile_excludes.add("tags")

if configuration_excludes:
spec_excludes["configuration"] = configuration_excludes
Expand All @@ -193,34 +70,4 @@ def get_job_spec_excludes(job_specs: list[JobSpec]) -> IncludeExcludeDictType:
clients backward-compatibility with older servers.
"""
spec_excludes: IncludeExcludeDictType = {}

if all(s.repo_code_hash is None for s in job_specs):
spec_excludes["repo_code_hash"] = True
if all(s.repo_data is None for s in job_specs):
spec_excludes["repo_data"] = True
if all(not s.file_archives for s in job_specs):
spec_excludes["file_archives"] = True
if all(s.service_port is None for s in job_specs):
spec_excludes["service_port"] = True
if all(not s.probes for s in job_specs):
spec_excludes["probes"] = True
if all(s.repo_dir in [None, LEGACY_REPO_DIR] for s in job_specs):
spec_excludes["repo_dir"] = True
if all(s.requirements.multinode is None for s in job_specs):
spec_excludes["requirements"] = {"multinode": True}

return spec_excludes


def _should_exclude_job_submission_jpd_cpu_arch(job_submission: JobSubmission) -> bool:
try:
return job_submission.job_provisioning_data.instance_type.resources.cpu_arch is None
except AttributeError:
return True


def _should_exclude_job_submission_jrd_cpu_arch(job_submission: JobSubmission) -> bool:
try:
return job_submission.job_runtime_data.offer.instance.resources.cpu_arch is None
except AttributeError:
return True
10 changes: 6 additions & 4 deletions src/dstack/_internal/core/compatibility/volumes.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,10 @@ def _get_volume_configuration_excludes(
configuration: VolumeConfiguration,
) -> IncludeExcludeDictType:
configuration_excludes: IncludeExcludeDictType = {}
if configuration.tags is None:
configuration_excludes["tags"] = True
if configuration.auto_cleanup_duration is None:
configuration_excludes["auto_cleanup_duration"] = True

# Add excludes like this:
#
# if configuration.tags is None:
# configuration_excludes["tags"] = True

return configuration_excludes
Loading