From d2900a4757d1c69ebf533b625b9fe6057b12a5e7 Mon Sep 17 00:00:00 2001 From: Jvst Me Date: Tue, 16 Dec 2025 01:16:52 +0100 Subject: [PATCH] Allow customizing gateway instance type Add an optional `instance_type` gateway configuration property that allows overriding the default (typically very small) gateway instance type. ```yaml type: gateway name: example-gateway backend: aws region: eu-west-1 instance_type: t3.large domain: example.com ``` Supported backends: - `aws` - `gcp` Not supported: - `kubernetes` (instance types are not differentiated) - `azure` (gateway support currently broken) The commit also includes improved gateway provisioning error handling in case the user specifies an invalid instance type. --- docs/docs/concepts/gateways.md | 21 +++++++++++ .../_internal/core/backends/aws/compute.py | 36 +++++++++++-------- .../_internal/core/backends/azure/compute.py | 10 +++++- .../_internal/core/backends/gcp/compute.py | 13 +++++-- .../core/backends/kubernetes/compute.py | 5 +++ src/dstack/_internal/core/models/gateways.py | 11 ++++++ .../server/services/gateways/__init__.py | 1 + .../_internal/server/routers/test_gateways.py | 7 ++++ 8 files changed, 85 insertions(+), 19 deletions(-) diff --git a/docs/docs/concepts/gateways.md b/docs/docs/concepts/gateways.md index 728077addb..7402bc5762 100644 --- a/docs/docs/concepts/gateways.md +++ b/docs/docs/concepts/gateways.md @@ -56,6 +56,27 @@ You can create gateways with the `aws`, `azure`, `gcp`, or `kubernetes` backends Gateways in `kubernetes` backend require an external load balancer. Managed Kubernetes solutions usually include a load balancer. For self-hosted Kubernetes, you must provide a load balancer by yourself. +### Instance type + +By default, `dstack` provisions a small, low-cost instance for the gateway. If you expect to run high-traffic services, you can configure a larger instance type using the `instance_type` property. + +
+ +```yaml +type: gateway +name: example-gateway + +backend: aws +region: eu-west-1 + +# (Optional) Override the gateway instance type +instance_type: t3.large + +domain: example.com +``` + +
+ ### Router By default, the gateway uses its own load balancer to route traffic between replicas. However, you can delegate this responsibility to a specific router by setting the `router` property. Currently, the only supported external router is `sglang`. diff --git a/src/dstack/_internal/core/backends/aws/compute.py b/src/dstack/_internal/core/backends/aws/compute.py index 67a7e409dd..48720bb316 100644 --- a/src/dstack/_internal/core/backends/aws/compute.py +++ b/src/dstack/_internal/core/backends/aws/compute.py @@ -76,6 +76,7 @@ logger = get_logger(__name__) # gp2 volumes can be 1GB-16TB, dstack AMIs are 100GB CONFIGURABLE_DISK_SIZE = Range[Memory](min=Memory.parse("100GB"), max=Memory.parse("16TB")) +DEFAULT_GATEWAY_INSTANCE_TYPE = "t3.micro" class AWSGatewayBackendData(CoreModel): @@ -454,22 +455,27 @@ def create_gateway( project_id=configuration.project_name, vpc_id=vpc_id, ) - response = ec2_resource.create_instances( - **aws_resources.create_instances_struct( - disk_size=10, - image_id=aws_resources.get_gateway_image_id(ec2_client), - instance_type="t3.micro", - iam_instance_profile=None, - user_data=get_gateway_user_data( - configuration.ssh_key_pub, router=configuration.router - ), - tags=tags, - security_group_id=security_group_id, - spot=False, - subnet_id=subnet_id, - allocate_public_ip=configuration.public_ip, - ) + instance_struct = aws_resources.create_instances_struct( + disk_size=10, + image_id=aws_resources.get_gateway_image_id(ec2_client), + instance_type=configuration.instance_type or DEFAULT_GATEWAY_INSTANCE_TYPE, + iam_instance_profile=None, + user_data=get_gateway_user_data( + configuration.ssh_key_pub, router=configuration.router + ), + tags=tags, + security_group_id=security_group_id, + spot=False, + subnet_id=subnet_id, + allocate_public_ip=configuration.public_ip, ) + try: + response = ec2_resource.create_instances(**instance_struct) + except botocore.exceptions.ClientError as e: + msg = f"AWS Error: {e.response['Error']['Code']}" + if e.response["Error"].get("Message"): + msg += f": {e.response['Error']['Message']}" + raise ComputeError(msg) instance = response[0] instance.wait_until_running() instance.reload() # populate instance.public_ip_address diff --git a/src/dstack/_internal/core/backends/azure/compute.py b/src/dstack/_internal/core/backends/azure/compute.py index 0089f5e478..74e585d631 100644 --- a/src/dstack/_internal/core/backends/azure/compute.py +++ b/src/dstack/_internal/core/backends/azure/compute.py @@ -79,6 +79,7 @@ logger = get_logger(__name__) # OS disks can be 1GB-4095GB, dstack images are 30GB CONFIGURABLE_DISK_SIZE = Range[Memory](min=Memory.parse("30GB"), max=Memory.parse("4095GB")) +DEFAULT_GATEWAY_INSTANCE_TYPE = "Standard_B1ms" class AzureCompute( @@ -230,6 +231,13 @@ def create_gateway( self, configuration: GatewayComputeConfiguration, ) -> GatewayProvisioningData: + if configuration.instance_type is not None: + # TODO: support instance_type. Requires selecting a VM image to avoid errors like this: + # > The selected VM size 'Standard_E4s_v6' cannot boot Hypervisor Generation '1' + raise ComputeError( + "The `azure` backend does not support the `instance_type`" + " gateway configuration property" + ) logger.info( "Launching %s gateway instance in %s...", configuration.instance_name, @@ -275,7 +283,7 @@ def create_gateway( managed_identity_name=None, managed_identity_resource_group=None, image_reference=_get_gateway_image_ref(), - vm_size="Standard_B1ms", + vm_size=DEFAULT_GATEWAY_INSTANCE_TYPE, instance_name=instance_name, user_data=get_gateway_user_data( configuration.ssh_key_pub, router=configuration.router diff --git a/src/dstack/_internal/core/backends/gcp/compute.py b/src/dstack/_internal/core/backends/gcp/compute.py index 8e5c36fccb..76a394bc7a 100644 --- a/src/dstack/_internal/core/backends/gcp/compute.py +++ b/src/dstack/_internal/core/backends/gcp/compute.py @@ -88,6 +88,7 @@ ) RESOURCE_NAME_PATTERN = re.compile(r"[a-z0-9-]+") TPU_VERSIONS = [tpu.name for tpu in KNOWN_TPUS] +DEFAULT_GATEWAY_INSTANCE_TYPE = "e2-medium" class GCPOfferBackendData(CoreModel): @@ -596,7 +597,7 @@ def create_gateway( request.instance_resource = gcp_resources.create_instance_struct( disk_size=10, image_id=_get_gateway_image_id(), - machine_type="e2-medium", + machine_type=configuration.instance_type or DEFAULT_GATEWAY_INSTANCE_TYPE, accelerators=[], spot=False, user_data=get_gateway_user_data( @@ -612,8 +613,14 @@ def create_gateway( subnetwork=subnetwork, allocate_public_ip=configuration.public_ip, ) - operation = self.instances_client.insert(request=request) - gcp_resources.wait_for_extended_operation(operation, "instance creation") + try: + operation = self.instances_client.insert(request=request) + gcp_resources.wait_for_extended_operation(operation, "instance creation") + except ( + google.api_core.exceptions.ServiceUnavailable, + google.api_core.exceptions.ClientError, + ) as e: + raise ComputeError(f"GCP error: {e.message}") instance = self.instances_client.get( project=self.config.project_id, zone=zone, instance=instance_name ) diff --git a/src/dstack/_internal/core/backends/kubernetes/compute.py b/src/dstack/_internal/core/backends/kubernetes/compute.py index da5e125b96..e46b99d9d7 100644 --- a/src/dstack/_internal/core/backends/kubernetes/compute.py +++ b/src/dstack/_internal/core/backends/kubernetes/compute.py @@ -370,6 +370,11 @@ def create_gateway( # TODO: By default EKS creates a Classic Load Balancer for Load Balancer services. # Consider deploying an NLB. It seems it requires some extra configuration on the cluster: # https://docs.aws.amazon.com/eks/latest/userguide/network-load-balancing.html + if configuration.instance_type is not None: + raise ComputeError( + "The `kubernetes` backend does not support the `instance_type`" + " gateway configuration property" + ) instance_name = generate_unique_gateway_instance_name(configuration) commands = _get_gateway_commands( authorized_keys=[configuration.ssh_key_pub], router=configuration.router diff --git a/src/dstack/_internal/core/models/gateways.py b/src/dstack/_internal/core/models/gateways.py index ace68e5429..2dfeb5b181 100644 --- a/src/dstack/_internal/core/models/gateways.py +++ b/src/dstack/_internal/core/models/gateways.py @@ -51,6 +51,16 @@ class GatewayConfiguration(CoreModel): default: Annotated[bool, Field(description="Make the gateway default")] = False backend: Annotated[BackendType, Field(description="The gateway backend")] region: Annotated[str, Field(description="The gateway region")] + instance_type: Annotated[ + Optional[str], + Field( + description=( + "Backend-specific instance type to use for the gateway instance." + " Omit to use the backend's default, which is typically a small non-GPU instance" + ), + min_length=1, + ), + ] = None router: Annotated[ Optional[AnyRouterConfig], Field(description="The router configuration"), @@ -115,6 +125,7 @@ class GatewayComputeConfiguration(CoreModel): instance_name: str backend: BackendType region: str + instance_type: Optional[str] = None public_ip: bool ssh_key_pub: str certificate: Optional[AnyGatewayCertificate] = None diff --git a/src/dstack/_internal/server/services/gateways/__init__.py b/src/dstack/_internal/server/services/gateways/__init__.py index 273b1fb894..682feaf31b 100644 --- a/src/dstack/_internal/server/services/gateways/__init__.py +++ b/src/dstack/_internal/server/services/gateways/__init__.py @@ -104,6 +104,7 @@ async def create_gateway_compute( instance_name=configuration.name, backend=configuration.backend, region=configuration.region, + instance_type=configuration.instance_type, public_ip=configuration.public_ip, ssh_key_pub=gateway_ssh_public_key, certificate=configuration.certificate, diff --git a/src/tests/_internal/server/routers/test_gateways.py b/src/tests/_internal/server/routers/test_gateways.py index 0bee1e6f06..b909c7d729 100644 --- a/src/tests/_internal/server/routers/test_gateways.py +++ b/src/tests/_internal/server/routers/test_gateways.py @@ -70,6 +70,7 @@ async def test_list(self, test_db, session: AsyncSession, client: AsyncClient): "name": gateway.name, "backend": backend.type.value, "region": gateway.region, + "instance_type": None, "router": None, "domain": gateway.wildcard_domain, "default": False, @@ -122,6 +123,7 @@ async def test_get(self, test_db, session: AsyncSession, client: AsyncClient): "name": gateway.name, "backend": backend.type.value, "region": gateway.region, + "instance_type": None, "router": None, "domain": gateway.wildcard_domain, "default": False, @@ -203,6 +205,7 @@ async def test_create_gateway(self, test_db, session: AsyncSession, client: Asyn "name": "test", "backend": backend.type.value, "region": "us", + "instance_type": None, "router": None, "domain": None, "default": True, @@ -256,6 +259,7 @@ async def test_create_gateway_without_name( "name": "random-name", "backend": backend.type.value, "region": "us", + "instance_type": None, "router": None, "domain": None, "default": True, @@ -359,6 +363,7 @@ async def test_set_default_gateway(self, test_db, session: AsyncSession, client: "name": gateway.name, "backend": backend.type.value, "region": gateway.region, + "instance_type": None, "router": None, "domain": gateway.wildcard_domain, "default": True, @@ -482,6 +487,7 @@ def get_backend(project, backend_type): "name": gateway_gcp.name, "backend": backend_gcp.type.value, "region": gateway_gcp.region, + "instance_type": None, "router": None, "domain": gateway_gcp.wildcard_domain, "default": False, @@ -552,6 +558,7 @@ async def test_set_wildcard_domain(self, test_db, session: AsyncSession, client: "name": gateway.name, "backend": backend.type.value, "region": gateway.region, + "instance_type": None, "router": None, "domain": "test.com", "default": False,