From 9744c0f40e30f6b6a94ba0d5128a3bc70e9585f3 Mon Sep 17 00:00:00 2001 From: Brian Lechthaler Date: Wed, 4 Feb 2026 16:02:14 -0800 Subject: [PATCH 01/17] Add Support for San Francisco Compute (#78) * add scaffolding, and client authentication. * return APITypeGlobal from GetAPIType function, as SFC accounts are not tied to specific regions. * fix apiKey in SFCCredential struct * scaffolding for instance.go * add instance creation implementation with SSH key support * add function to map the status of a node reported from SFC API to v1.LifecycleStatus in Brev * implement GetInstance in sfcompute with node data retrieval inluding SSH Hostname / Public IP * add TerminateInstance implementation with node release and delete logic * set default SSH port to 2222, as is standard for our platform * implement GetSSHHostname for retrieving the SSH hostname of an instance in sfcompute * remove unneeded call to api for GetSSHHostname * use VM ID instead of instance ID to retrieve SSH Hostname * remove get ssh hostname function * implement ListInstances * add validation test for sfcompute with API key check and skip logic * add getInstanceTypeID method for generating instance type IDs in sfcompute * bump sfcnodes version to v0.1.0-alpha.4 which adds support for the /v0/zones endpoint * implement GetLocations * only return approved zones * only return regions that have more than zero capacity instead of any zones that have capacity not equal to zero, in case the availability ever returns a negative number * update location description to include formatted hardware type information. example: `sfc_hayesvalley_h100` * return unavailable regions with v1.Location{Available: false} * fix an error where a nil map was returned * start implementing GetInstanceTypes * fix tests failing due to ValidateRegionalInstanceTypes and ValidateStableInstanceTypeIDs fails errors * set MaxPricePerNodePerHour to 1600 ($16/node/h, $2/gpu/h) * add regions excelsior and yerba * remove region excelsior --- go.mod | 5 + go.sum | 13 ++ v1/providers/sfcompute/capabilities.go | 24 ++++ v1/providers/sfcompute/client.go | 73 ++++++++++ v1/providers/sfcompute/instance.go | 159 ++++++++++++++++++++++ v1/providers/sfcompute/instancetype.go | 101 ++++++++++++++ v1/providers/sfcompute/validation_test.go | 38 ++++++ 7 files changed, 413 insertions(+) create mode 100644 v1/providers/sfcompute/capabilities.go create mode 100644 v1/providers/sfcompute/client.go create mode 100644 v1/providers/sfcompute/instance.go create mode 100644 v1/providers/sfcompute/instancetype.go create mode 100644 v1/providers/sfcompute/validation_test.go diff --git a/go.mod b/go.mod index bcf4b5e..a695f95 100644 --- a/go.mod +++ b/go.mod @@ -21,6 +21,7 @@ require ( github.com/jarcoal/httpmock v1.4.0 github.com/nebius/gosdk v0.0.0-20250826102719-940ad1dfb5de github.com/pkg/errors v0.9.1 + github.com/sfcompute/nodes-go v0.1.0-alpha.4 github.com/stretchr/testify v1.11.1 golang.org/x/crypto v0.47.0 golang.org/x/text v0.33.0 @@ -83,6 +84,10 @@ require ( github.com/sirupsen/logrus v1.9.3 // indirect github.com/spf13/afero v1.15.0 // indirect github.com/spf13/pflag v1.0.10 // indirect + github.com/tidwall/gjson v1.18.0 // indirect + github.com/tidwall/match v1.1.1 // indirect + github.com/tidwall/pretty v1.2.1 // indirect + github.com/tidwall/sjson v1.2.5 // indirect github.com/x448/float16 v0.8.4 // indirect go.yaml.in/yaml/v2 v2.4.3 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect diff --git a/go.sum b/go.sum index 5c23c5c..443dd04 100644 --- a/go.sum +++ b/go.sum @@ -160,6 +160,9 @@ github.com/prometheus/procfs v0.17.0 h1:FuLQ+05u4ZI+SS/w9+BWEM2TXiHKsUQ9TADiRH7D github.com/prometheus/procfs v0.17.0/go.mod h1:oPQLaDAMRbA+u8H5Pbfq+dl3VDAvHxMUOVhe0wYB2zw= github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= +github.com/sfcompute/nodes-go v0.1.0-alpha.3/go.mod h1:dF3O8MCxLz3FTVYhjCa876Z9O3EAM8E8fONivDpfmkM= +github.com/sfcompute/nodes-go v0.1.0-alpha.4 h1:oFBWcMPSpqLYm/NDs5I1jTvzgx9rsXDL9Ghsm30Hc0Q= +github.com/sfcompute/nodes-go v0.1.0-alpha.4/go.mod h1:nUviHgK+Fgt2hDFcRL3M8VoyiypC8fc0dsY8C30QU8M= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/spf13/afero v1.15.0 h1:b/YBCLWAJdFWJTN9cLhiXXcD7mzKn9Dm86dNnfyQw1I= @@ -180,6 +183,16 @@ github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXl github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY= +github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= +github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= +github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4= +github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY= +github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28= github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= diff --git a/v1/providers/sfcompute/capabilities.go b/v1/providers/sfcompute/capabilities.go new file mode 100644 index 0000000..ac0604a --- /dev/null +++ b/v1/providers/sfcompute/capabilities.go @@ -0,0 +1,24 @@ +package v1 + +import ( + "context" + + v1 "github.com/brevdev/cloud/v1" +) + +func getSFCCapabilities() v1.Capabilities { + return v1.Capabilities{ + v1.CapabilityCreateInstance, + v1.CapabilityTerminateInstance, + v1.CapabilityCreateTerminateInstance, + // add others supported by your provider: reboot, stop/start, machine-image, tags, resize-volume, modify-firewall, etc. + } +} + +func (c *SFCClient) GetCapabilities(_ context.Context) (v1.Capabilities, error) { + return getSFCCapabilities(), nil +} + +func (c *SFCCredential) GetCapabilities(_ context.Context) (v1.Capabilities, error) { + return getSFCCapabilities(), nil +} diff --git a/v1/providers/sfcompute/client.go b/v1/providers/sfcompute/client.go new file mode 100644 index 0000000..fce08ca --- /dev/null +++ b/v1/providers/sfcompute/client.go @@ -0,0 +1,73 @@ +package v1 + +import ( + "context" + + v1 "github.com/brevdev/cloud/v1" + "github.com/sfcompute/nodes-go/option" + + sfcnodes "github.com/sfcompute/nodes-go" +) + +type SFCCredential struct { + RefID string + apiKey string `json:"api_key"` +} + +var _ v1.CloudCredential = &SFCCredential{} + +func NewSFCCredential(refID string, apiKey string /* auth fields */) *SFCCredential { + return &SFCCredential{ + RefID: refID, + apiKey: apiKey, + // ... + } +} + +func (c *SFCCredential) GetReferenceID() string { return c.RefID } +func (c *SFCCredential) GetAPIType() v1.APIType { return v1.APITypeLocational /* or v1.APITypeGlobal */ } +func (c *SFCCredential) GetCloudProviderID() v1.CloudProviderID { + return "sfcompute" // e.g., "lambdalabs" +} +func (c *SFCCredential) GetTenantID() (string, error) { + // sfc does not have a tenant system, return empty string + return "", nil +} + +func (c *SFCCredential) MakeClient(ctx context.Context, location string) (v1.CloudClient, error) { + // Create a client configured for a given location if locational API + return NewSFCClient(c.RefID, c.apiKey /* auth fields */).MakeClient(ctx, location) +} + +// ---------------- Client ---------------- + +type SFCClient struct { + v1.NotImplCloudClient + refID string + location string + apiKey string + client sfcnodes.Client // Add this field + // add http/sdk client fields, base URLs, etc. +} + +var _ v1.CloudClient = &SFCClient{} + +func NewSFCClient(refID string, apiKey string /* auth fields */) *SFCClient { + return &SFCClient{ + refID: refID, + apiKey: apiKey, + client: sfcnodes.NewClient( + option.WithBearerToken(apiKey)), + // init http/sdk clients here + } +} + +func (c *SFCClient) GetAPIType() v1.APIType { return v1.APITypeGlobal /* or Global */ } +func (c *SFCClient) GetCloudProviderID() v1.CloudProviderID { return "sfcompute" } +func (c *SFCClient) GetReferenceID() string { return c.refID } +func (c *SFCClient) GetTenantID() (string, error) { return "", nil } + +func (c *SFCClient) MakeClient(_ context.Context, location string) (v1.CloudClient, error) { + c.location = location + return c, nil +} diff --git a/v1/providers/sfcompute/instance.go b/v1/providers/sfcompute/instance.go new file mode 100644 index 0000000..4658eee --- /dev/null +++ b/v1/providers/sfcompute/instance.go @@ -0,0 +1,159 @@ +package v1 + +import ( + "context" + "encoding/base64" + "fmt" + "strings" + "time" + + v1 "github.com/brevdev/cloud/v1" + sfcnodes "github.com/sfcompute/nodes-go" + "github.com/sfcompute/nodes-go/packages/param" +) + +// define function to convert string to b64 +func toBase64(s string) string { + return base64.StdEncoding.EncodeToString([]byte(s)) +} + +// define function to add ssh key to cloud init +func sshKeyCloudInit(sshKey string) string { + return toBase64(fmt.Sprintf("#cloud-config\nssh_authorized_keys:\n - %s", sshKey)) +} + +func mapSFCStatus(s string) v1.LifecycleStatus { + switch strings.ToLower(s) { + case "pending", "nodefailure", "unspecified", "awaitingcapacity", "unknown", "failed": + return v1.LifecycleStatusPending + case "running": + return v1.LifecycleStatusRunning + // case "stopping": + //return v1.LifecycleStatusStopping + case "stopped": + return v1.LifecycleStatusStopped + case "terminating", "released": + return v1.LifecycleStatusTerminating + case "destroyed", "deleted": + return v1.LifecycleStatusTerminated + default: + return v1.LifecycleStatusPending + } +} + +func (c *SFCClient) CreateInstance(ctx context.Context, attrs v1.CreateInstanceAttrs) (*v1.Instance, error) { + resp, err := c.client.Nodes.New(ctx, sfcnodes.NodeNewParams{ + CreateNodesRequest: sfcnodes.CreateNodesRequestParam{ + DesiredCount: 1, + MaxPricePerNodeHour: 1600, + Zone: attrs.Location, + ImageID: param.Opt[string]{Value: attrs.ImageID}, //this needs to point to a valid image + CloudInitUserData: param.Opt[string]{Value: sshKeyCloudInit(attrs.PublicKey)}, // encode ssh key to b64-wrapped cloud-init script + }, + }) + if err != nil { + return nil, err + } + + if len(resp.Data) == 0 { + return nil, fmt.Errorf("no nodes returned") + } + node := resp.Data[0] + + inst := &v1.Instance{ + Name: attrs.Name, + RefID: attrs.RefID, + CloudCredRefID: c.refID, + CloudID: v1.CloudProviderInstanceID(node.ID), // SFC ID + ImageID: attrs.ImageID, + InstanceType: attrs.InstanceType, + Location: attrs.Location, + CreatedAt: time.Now(), + Status: v1.Status{LifecycleStatus: mapSFCStatus(fmt.Sprint(node.Status))}, // map SDK status to our lifecycle + InstanceTypeID: v1.InstanceTypeID(node.GPUType), + SSHPort: 2222, // we use 2222/tcp for all of our SSH ports + } + + return inst, nil +} + +func (c *SFCClient) GetInstance(ctx context.Context, id v1.CloudProviderInstanceID) (*v1.Instance, error) { + node, err := c.client.Nodes.Get(ctx, string(id)) + if err != nil { + panic(err.Error()) + } + var vmID string + if len(node.VMs.Data) > 0 { + vmID = node.VMs.Data[0].ID + fmt.Println(vmID) + } + + ssh, err := c.client.VMs.SSH(ctx, sfcnodes.VMSSHParams{VMID: vmID}) + if err != nil { + panic(err.Error()) + } + + inst := &v1.Instance{ + Name: node.Name, + RefID: c.refID, + CloudCredRefID: c.refID, + CloudID: v1.CloudProviderInstanceID(node.ID), // SFC ID + PublicIP: ssh.SSHHostname, + CreatedAt: time.Unix(node.CreatedAt, 0), + Status: v1.Status{LifecycleStatus: mapSFCStatus(fmt.Sprint(node.Status))}, // map SDK status to our lifecycle + InstanceTypeID: v1.InstanceTypeID(node.GPUType), + } + return inst, nil +} + +func (c *SFCClient) ListInstances(ctx context.Context, args v1.ListInstancesArgs) ([]v1.Instance, error) { + resp, err := c.client.Nodes.List(ctx, sfcnodes.NodeListParams{}) + if err != nil { + return nil, err + } + + var instances []v1.Instance + for _, node := range resp.Data { + inst, err := c.GetInstance(ctx, v1.CloudProviderInstanceID(node.ID)) + if err != nil { + return nil, err + } + if inst != nil { + instances = append(instances, *inst) + } + } + return instances, nil +} + +func (c *SFCClient) TerminateInstance(ctx context.Context, id v1.CloudProviderInstanceID) error { + // release the node first + _, errRelease := c.client.Nodes.Release(ctx, string(id)) + if errRelease != nil { + panic(errRelease.Error()) + } + // then delete the node + errDelete := c.client.Nodes.Delete(ctx, string(id)) + if errDelete != nil { + panic(errDelete.Error()) + } + return nil +} + +// Optional if supported: +func (c *SFCClient) RebootInstance(ctx context.Context, id v1.CloudProviderInstanceID) error { + return fmt.Errorf("not implemented") +} +func (c *SFCClient) StopInstance(ctx context.Context, id v1.CloudProviderInstanceID) error { + return fmt.Errorf("not implemented") +} +func (c *SFCClient) StartInstance(ctx context.Context, id v1.CloudProviderInstanceID) error { + return fmt.Errorf("not implemented") +} + +// Merge strategies (pass-through is acceptable baseline). +func (c *SFCClient) MergeInstanceForUpdate(_ v1.Instance, newInst v1.Instance) v1.Instance { + return newInst +} +func (c *SFCClient) MergeInstanceTypeForUpdate(_ v1.InstanceType, newIt v1.InstanceType) v1.InstanceType { + return newIt +} diff --git a/v1/providers/sfcompute/instancetype.go b/v1/providers/sfcompute/instancetype.go new file mode 100644 index 0000000..a068f63 --- /dev/null +++ b/v1/providers/sfcompute/instancetype.go @@ -0,0 +1,101 @@ +package v1 + +import ( + "context" + "fmt" + "slices" + "strconv" + "time" + + "github.com/bojanz/currency" + "github.com/brevdev/cloud/internal/collections" + + v1 "github.com/brevdev/cloud/v1" +) + +func (c *SFCClient) getInstanceTypeID(region string) string { + return fmt.Sprintf("h100v_%v", region) +} + +func (c *SFCClient) GetInstanceTypes(ctx context.Context, args v1.GetInstanceTypeArgs) ([]v1.InstanceType, error) { + resp, err := c.client.Zones.List(ctx) + if err != nil { + return nil, err + } + + types := make([]v1.InstanceType, 0) + for _, zone := range resp.Data { + if len(args.Locations) > 0 && !args.Locations.IsAllowed(zone.Name) { + continue + } + var available = false + if len(zone.AvailableCapacity) > 0 && zone.DeliveryType == "VM" { + available = true + } + + price, _ := currency.NewAmount(strconv.Itoa(2), "USD") + types = append(types, v1.InstanceType{ + ID: v1.InstanceTypeID(c.getInstanceTypeID(zone.Name)), + IsAvailable: available, + Type: "h100v", + Location: zone.Name, + Stoppable: false, + Rebootable: false, + IsContainer: false, + BasePrice: &price, + EstimatedDeployTime: collections.Ptr(time.Duration(15 * time.Minute)), + SupportedGPUs: []v1.GPU{{ + Count: 8, + Type: "h100v", + Manufacturer: "nvidia", + Name: "h100v", + MemoryBytes: v1.NewBytes(80, v1.Gibibyte), + }}, + }) + + } + + if len(args.InstanceTypes) > 0 { + filteredTypes := make([]v1.InstanceType, 0) + for _, t := range types { + if slices.Contains(args.InstanceTypes, t.Type) { + filteredTypes = append(filteredTypes, t) + } + } + return filteredTypes, nil + } + + return types, nil +} + +func (c *SFCClient) GetLocations(ctx context.Context, _ v1.GetLocationsArgs) ([]v1.Location, error) { + resp, err := c.client.Zones.List(ctx) + if err != nil { + return nil, err + } + locations := make(map[string]v1.Location) + allowedZones := []string{"hayesvalley", "yerba"} + if resp != nil { + for _, zone := range resp.Data { + var available = false + if len(zone.AvailableCapacity) > 0 && zone.DeliveryType == "VM" && slices.Contains(allowedZones, zone.Name) == true { + available = true + locations[zone.Name] = v1.Location{ + Name: zone.Name, + Description: fmt.Sprintf("sfc_%s_%s", zone.Name, string(zone.HardwareType)), + Available: available} + } else { + available = false + locations[zone.Name] = v1.Location{ + Name: zone.Name, + Description: fmt.Sprintf("sfc_%s_%s", zone.Name, string(zone.HardwareType)), + Available: false} + } + } + } + availableLocations := []v1.Location{} + for _, location := range locations { + availableLocations = append(availableLocations, location) + } + return availableLocations, nil +} diff --git a/v1/providers/sfcompute/validation_test.go b/v1/providers/sfcompute/validation_test.go new file mode 100644 index 0000000..a8e0dd2 --- /dev/null +++ b/v1/providers/sfcompute/validation_test.go @@ -0,0 +1,38 @@ +package v1 + +import ( + "os" + "testing" + + "github.com/brevdev/cloud/internal/validation" + v1 "github.com/brevdev/cloud/v1" +) + +func TestValidationFunctions(t *testing.T) { + checkSkip(t) + apiKey := getAPIKey() + + config := validation.ProviderConfig{ + Credential: NewSFCCredential("validation-test", apiKey), + StableIDs: []v1.InstanceTypeID{ + "h100v_hayesvalley", + "h100v_yerba", + }, + } + + validation.RunValidationSuite(t, config) +} + +func checkSkip(t *testing.T) { + apiKey := getAPIKey() + isValidation := os.Getenv("VALIDATION_TEST") + if apiKey == "" && isValidation != "true" { + t.Fatal("SFCOMPUTE_API_KEY not set, but VALIDATION_TEST is set") + } else if apiKey == "" && isValidation == "false" { + t.Skip("SFCOMPUTE_API_KEY not set, skipping sfcompute validation tests") + } +} + +func getAPIKey() string { + return os.Getenv("SFCOMPUTE_API_KEY") +} From c29dd5815dc62ebfc5b9fb17b5e5773d4bdd7612 Mon Sep 17 00:00:00 2001 From: Drew Malin Date: Wed, 4 Feb 2026 16:09:23 -0800 Subject: [PATCH 02/17] make lint --- v1/providers/sfcompute/client.go | 7 ++++--- v1/providers/sfcompute/instance.go | 24 +++++++++++++++--------- v1/providers/sfcompute/instancetype.go | 18 ++++++++++-------- 3 files changed, 29 insertions(+), 20 deletions(-) diff --git a/v1/providers/sfcompute/client.go b/v1/providers/sfcompute/client.go index fce08ca..3885035 100644 --- a/v1/providers/sfcompute/client.go +++ b/v1/providers/sfcompute/client.go @@ -11,7 +11,7 @@ import ( type SFCCredential struct { RefID string - apiKey string `json:"api_key"` + APIKey string `json:"api_key"` } var _ v1.CloudCredential = &SFCCredential{} @@ -19,7 +19,7 @@ var _ v1.CloudCredential = &SFCCredential{} func NewSFCCredential(refID string, apiKey string /* auth fields */) *SFCCredential { return &SFCCredential{ RefID: refID, - apiKey: apiKey, + APIKey: apiKey, // ... } } @@ -29,6 +29,7 @@ func (c *SFCCredential) GetAPIType() v1.APIType { return v1.APITypeLocational /* func (c *SFCCredential) GetCloudProviderID() v1.CloudProviderID { return "sfcompute" // e.g., "lambdalabs" } + func (c *SFCCredential) GetTenantID() (string, error) { // sfc does not have a tenant system, return empty string return "", nil @@ -36,7 +37,7 @@ func (c *SFCCredential) GetTenantID() (string, error) { func (c *SFCCredential) MakeClient(ctx context.Context, location string) (v1.CloudClient, error) { // Create a client configured for a given location if locational API - return NewSFCClient(c.RefID, c.apiKey /* auth fields */).MakeClient(ctx, location) + return NewSFCClient(c.RefID, c.APIKey /* auth fields */).MakeClient(ctx, location) } // ---------------- Client ---------------- diff --git a/v1/providers/sfcompute/instance.go b/v1/providers/sfcompute/instance.go index 4658eee..ad3e7b6 100644 --- a/v1/providers/sfcompute/instance.go +++ b/v1/providers/sfcompute/instance.go @@ -29,7 +29,7 @@ func mapSFCStatus(s string) v1.LifecycleStatus { case "running": return v1.LifecycleStatusRunning // case "stopping": - //return v1.LifecycleStatusStopping + // return v1.LifecycleStatusStopping case "stopped": return v1.LifecycleStatusStopped case "terminating", "released": @@ -47,7 +47,7 @@ func (c *SFCClient) CreateInstance(ctx context.Context, attrs v1.CreateInstanceA DesiredCount: 1, MaxPricePerNodeHour: 1600, Zone: attrs.Location, - ImageID: param.Opt[string]{Value: attrs.ImageID}, //this needs to point to a valid image + ImageID: param.Opt[string]{Value: attrs.ImageID}, // this needs to point to a valid image CloudInitUserData: param.Opt[string]{Value: sshKeyCloudInit(attrs.PublicKey)}, // encode ssh key to b64-wrapped cloud-init script }, }) @@ -106,7 +106,7 @@ func (c *SFCClient) GetInstance(ctx context.Context, id v1.CloudProviderInstance return inst, nil } -func (c *SFCClient) ListInstances(ctx context.Context, args v1.ListInstancesArgs) ([]v1.Instance, error) { +func (c *SFCClient) ListInstances(ctx context.Context, _ v1.ListInstancesArgs) ([]v1.Instance, error) { resp, err := c.client.Nodes.List(ctx, sfcnodes.NodeListParams{}) if err != nil { return nil, err @@ -122,6 +122,9 @@ func (c *SFCClient) ListInstances(ctx context.Context, args v1.ListInstancesArgs instances = append(instances, *inst) } } + + // TODO: filter by args + return instances, nil } @@ -140,20 +143,23 @@ func (c *SFCClient) TerminateInstance(ctx context.Context, id v1.CloudProviderIn } // Optional if supported: -func (c *SFCClient) RebootInstance(ctx context.Context, id v1.CloudProviderInstanceID) error { - return fmt.Errorf("not implemented") +func (c *SFCClient) RebootInstance(_ context.Context, _ v1.CloudProviderInstanceID) error { + return v1.ErrNotImplemented } -func (c *SFCClient) StopInstance(ctx context.Context, id v1.CloudProviderInstanceID) error { - return fmt.Errorf("not implemented") + +func (c *SFCClient) StopInstance(_ context.Context, _ v1.CloudProviderInstanceID) error { + return v1.ErrNotImplemented } -func (c *SFCClient) StartInstance(ctx context.Context, id v1.CloudProviderInstanceID) error { - return fmt.Errorf("not implemented") + +func (c *SFCClient) StartInstance(_ context.Context, _ v1.CloudProviderInstanceID) error { + return v1.ErrNotImplemented } // Merge strategies (pass-through is acceptable baseline). func (c *SFCClient) MergeInstanceForUpdate(_ v1.Instance, newInst v1.Instance) v1.Instance { return newInst } + func (c *SFCClient) MergeInstanceTypeForUpdate(_ v1.InstanceType, newIt v1.InstanceType) v1.InstanceType { return newIt } diff --git a/v1/providers/sfcompute/instancetype.go b/v1/providers/sfcompute/instancetype.go index a068f63..c6d8406 100644 --- a/v1/providers/sfcompute/instancetype.go +++ b/v1/providers/sfcompute/instancetype.go @@ -8,7 +8,6 @@ import ( "time" "github.com/bojanz/currency" - "github.com/brevdev/cloud/internal/collections" v1 "github.com/brevdev/cloud/v1" ) @@ -28,12 +27,14 @@ func (c *SFCClient) GetInstanceTypes(ctx context.Context, args v1.GetInstanceTyp if len(args.Locations) > 0 && !args.Locations.IsAllowed(zone.Name) { continue } - var available = false + available := false if len(zone.AvailableCapacity) > 0 && zone.DeliveryType == "VM" { available = true } price, _ := currency.NewAmount(strconv.Itoa(2), "USD") + estimatedDeployTime := 15 * time.Minute + types = append(types, v1.InstanceType{ ID: v1.InstanceTypeID(c.getInstanceTypeID(zone.Name)), IsAvailable: available, @@ -43,7 +44,7 @@ func (c *SFCClient) GetInstanceTypes(ctx context.Context, args v1.GetInstanceTyp Rebootable: false, IsContainer: false, BasePrice: &price, - EstimatedDeployTime: collections.Ptr(time.Duration(15 * time.Minute)), + EstimatedDeployTime: &estimatedDeployTime, SupportedGPUs: []v1.GPU{{ Count: 8, Type: "h100v", @@ -52,7 +53,6 @@ func (c *SFCClient) GetInstanceTypes(ctx context.Context, args v1.GetInstanceTyp MemoryBytes: v1.NewBytes(80, v1.Gibibyte), }}, }) - } if len(args.InstanceTypes) > 0 { @@ -77,19 +77,21 @@ func (c *SFCClient) GetLocations(ctx context.Context, _ v1.GetLocationsArgs) ([] allowedZones := []string{"hayesvalley", "yerba"} if resp != nil { for _, zone := range resp.Data { - var available = false - if len(zone.AvailableCapacity) > 0 && zone.DeliveryType == "VM" && slices.Contains(allowedZones, zone.Name) == true { + available := false + if len(zone.AvailableCapacity) > 0 && zone.DeliveryType == "VM" && slices.Contains(allowedZones, zone.Name) { available = true locations[zone.Name] = v1.Location{ Name: zone.Name, Description: fmt.Sprintf("sfc_%s_%s", zone.Name, string(zone.HardwareType)), - Available: available} + Available: available, + } } else { available = false locations[zone.Name] = v1.Location{ Name: zone.Name, Description: fmt.Sprintf("sfc_%s_%s", zone.Name, string(zone.HardwareType)), - Available: false} + Available: false, + } } } } From a6d410cab8e19a0c81f312fe26e0249608bed18e Mon Sep 17 00:00:00 2001 From: Drew Malin Date: Wed, 4 Feb 2026 16:15:36 -0800 Subject: [PATCH 03/17] run sfcompute in workflow --- .github/workflows/validation-sfcompute.yml | 59 ++++++++++++++++++++++ v1/providers/sfcompute/validation_test.go | 14 +++++ 2 files changed, 73 insertions(+) create mode 100644 .github/workflows/validation-sfcompute.yml diff --git a/.github/workflows/validation-sfcompute.yml b/.github/workflows/validation-sfcompute.yml new file mode 100644 index 0000000..4813366 --- /dev/null +++ b/.github/workflows/validation-sfcompute.yml @@ -0,0 +1,59 @@ +name: SFCompute Validation Tests + +on: + schedule: + # Run daily at 2 AM UTC + - cron: '0 2 * * *' + workflow_dispatch: + # Allow manual triggering + pull_request: + paths: + - 'v1/providers/sfcompute/**' + - 'internal/validation/**' + - 'v1/**' + branches: [ main ] + +jobs: + sfcompute-validation: + name: SFCompute Provider Validation + runs-on: ubuntu-latest + if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request' + + steps: + - uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v4 + with: + go-version-file: 'go.mod' + + - name: Cache Go modules + uses: actions/cache@v4 + with: + path: | + ~/.cache/go-build + ~/go/pkg/mod + key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} + restore-keys: | + ${{ runner.os }}-go- + + - name: Install dependencies + run: make deps + + - name: Run SFCompute validation tests + env: + SFCOMPUTE_API_KEY: ${{ secrets.SFCOMPUTE_API_KEY }} + TEST_PRIVATE_KEY_BASE64: ${{ secrets.TEST_PRIVATE_KEY_BASE64 }} + TEST_PUBLIC_KEY_BASE64: ${{ secrets.TEST_PUBLIC_KEY_BASE64 }} + VALIDATION_TEST: true + run: | + cd v1/providers/sfcompute + go test -v -short=false -timeout=30m ./... + + - name: Upload test results + uses: actions/upload-artifact@v4 + if: always() + with: + name: sfcompute-validation-results + path: | + v1/providers/sfcompute/coverage.out diff --git a/v1/providers/sfcompute/validation_test.go b/v1/providers/sfcompute/validation_test.go index a8e0dd2..6f86f49 100644 --- a/v1/providers/sfcompute/validation_test.go +++ b/v1/providers/sfcompute/validation_test.go @@ -9,6 +9,7 @@ import ( ) func TestValidationFunctions(t *testing.T) { + t.Parallel() checkSkip(t) apiKey := getAPIKey() @@ -23,6 +24,19 @@ func TestValidationFunctions(t *testing.T) { validation.RunValidationSuite(t, config) } +func TestInstanceLifecycleValidation(t *testing.T) { + t.Parallel() + checkSkip(t) + apiKey := getAPIKey() + + config := validation.ProviderConfig{ + Credential: NewSFCCredential("validation-test", apiKey), + Location: "eu-north1", + } + + validation.RunInstanceLifecycleValidation(t, config) +} + func checkSkip(t *testing.T) { apiKey := getAPIKey() isValidation := os.Getenv("VALIDATION_TEST") From 096d7011f10215305d1ea922af408e95edd4d09e Mon Sep 17 00:00:00 2001 From: Drew Malin Date: Thu, 5 Feb 2026 14:16:44 -0800 Subject: [PATCH 04/17] fix --- v1/instancetype.go | 32 +++ v1/providers/launchpad/instancetype.go | 36 +-- v1/providers/sfcompute/client.go | 69 +++-- v1/providers/sfcompute/instance.go | 290 ++++++++++++++++---- v1/providers/sfcompute/instancetype.go | 213 +++++++++----- v1/providers/sfcompute/instancetype_test.go | 166 +++++++++++ v1/providers/sfcompute/validation_test.go | 6 +- 7 files changed, 637 insertions(+), 175 deletions(-) create mode 100644 v1/providers/sfcompute/instancetype_test.go diff --git a/v1/instancetype.go b/v1/instancetype.go index 3c09440..5401a76 100644 --- a/v1/instancetype.go +++ b/v1/instancetype.go @@ -439,3 +439,35 @@ func ValidateStableInstanceTypeIDs(ctx context.Context, client CloudInstanceType return nil } + +func IsSelectedByArgs(instanceType InstanceType, args GetInstanceTypeArgs) bool { + if args.Locations != nil { + if !args.Locations.IsAllowed(instanceType.Location) { + return false + } + } + + if args.GPUManufactererFilter != nil { + for _, supportedGPU := range instanceType.SupportedGPUs { + if !args.GPUManufactererFilter.IsAllowed(supportedGPU.Manufacturer) { + return false + } + } + } + + if args.CloudFilter != nil { + if !args.CloudFilter.IsAllowed(instanceType.Cloud) { + return false + } + } + + if args.ArchitectureFilter != nil { + for _, architecture := range instanceType.SupportedArchitectures { + if !args.ArchitectureFilter.IsAllowed(architecture) { + return false + } + } + } + + return true +} diff --git a/v1/providers/launchpad/instancetype.go b/v1/providers/launchpad/instancetype.go index ca9b317..a66f941 100644 --- a/v1/providers/launchpad/instancetype.go +++ b/v1/providers/launchpad/instancetype.go @@ -44,7 +44,7 @@ func (c *LaunchpadClient) GetInstanceTypes(ctx context.Context, args v1.GetInsta } // Collect the instance type if it is selected by the args - if isSelectedByArgs(*instanceType, args) { + if v1.IsSelectedByArgs(*instanceType, args) { instanceTypes = append(instanceTypes, *instanceType) } else { continue @@ -55,40 +55,6 @@ func (c *LaunchpadClient) GetInstanceTypes(ctx context.Context, args v1.GetInsta return instanceTypes, nil } -func isSelectedByArgs(instanceType v1.InstanceType, args v1.GetInstanceTypeArgs) bool { - if args.Locations != nil { - for _, location := range instanceType.Location { - if !args.Locations.IsAllowed(string(location)) { - return false - } - } - } - - if args.GPUManufactererFilter != nil { - for _, supportedGPU := range instanceType.SupportedGPUs { - if !args.GPUManufactererFilter.IsAllowed(supportedGPU.Manufacturer) { - return false - } - } - } - - if args.CloudFilter != nil { - if !args.CloudFilter.IsAllowed(instanceType.Cloud) { - return false - } - } - - if args.ArchitectureFilter != nil { - for _, architecture := range instanceType.SupportedArchitectures { - if !args.ArchitectureFilter.IsAllowed(architecture) { - return false - } - } - } - - return true -} - func (c *LaunchpadClient) paginateInstanceTypes(ctx context.Context, pageSize int32) ([]openapi.InstanceType, error) { instanceTypes := make([]openapi.InstanceType, 0, pageSize) var page int32 = 1 diff --git a/v1/providers/sfcompute/client.go b/v1/providers/sfcompute/client.go index 3885035..27f16d4 100644 --- a/v1/providers/sfcompute/client.go +++ b/v1/providers/sfcompute/client.go @@ -9,6 +9,8 @@ import ( sfcnodes "github.com/sfcompute/nodes-go" ) +const CloudProviderID = "sfcompute" + type SFCCredential struct { RefID string APIKey string `json:"api_key"` @@ -16,18 +18,23 @@ type SFCCredential struct { var _ v1.CloudCredential = &SFCCredential{} -func NewSFCCredential(refID string, apiKey string /* auth fields */) *SFCCredential { +func NewSFCCredential(refID string, apiKey string) *SFCCredential { return &SFCCredential{ RefID: refID, APIKey: apiKey, - // ... } } -func (c *SFCCredential) GetReferenceID() string { return c.RefID } -func (c *SFCCredential) GetAPIType() v1.APIType { return v1.APITypeLocational /* or v1.APITypeGlobal */ } +func (c *SFCCredential) GetReferenceID() string { + return c.RefID +} + +func (c *SFCCredential) GetAPIType() v1.APIType { + return v1.APITypeGlobal +} + func (c *SFCCredential) GetCloudProviderID() v1.CloudProviderID { - return "sfcompute" // e.g., "lambdalabs" + return CloudProviderID } func (c *SFCCredential) GetTenantID() (string, error) { @@ -36,37 +43,59 @@ func (c *SFCCredential) GetTenantID() (string, error) { } func (c *SFCCredential) MakeClient(ctx context.Context, location string) (v1.CloudClient, error) { - // Create a client configured for a given location if locational API - return NewSFCClient(c.RefID, c.APIKey /* auth fields */).MakeClient(ctx, location) + return NewSFCClient(c.RefID, c.APIKey).MakeClient(ctx, location) } -// ---------------- Client ---------------- - type SFCClient struct { v1.NotImplCloudClient refID string location string apiKey string - client sfcnodes.Client // Add this field - // add http/sdk client fields, base URLs, etc. + client sfcnodes.Client + logger v1.Logger } var _ v1.CloudClient = &SFCClient{} -func NewSFCClient(refID string, apiKey string /* auth fields */) *SFCClient { - return &SFCClient{ +type SFCClientOption func(c *SFCClient) + +func WithLogger(logger v1.Logger) SFCClientOption { + return func(c *SFCClient) { + c.logger = logger + } +} + +func NewSFCClient(refID string, apiKey string, opts ...SFCClientOption) *SFCClient { + sfcClient := &SFCClient{ refID: refID, apiKey: apiKey, - client: sfcnodes.NewClient( - option.WithBearerToken(apiKey)), - // init http/sdk clients here + client: sfcnodes.NewClient(option.WithBearerToken(apiKey)), + logger: &v1.NoopLogger{}, + } + + for _, opt := range opts { + opt(sfcClient) } + + return sfcClient +} + +func (c *SFCClient) GetAPIType() v1.APIType { + return v1.APITypeGlobal +} + +func (c *SFCClient) GetCloudProviderID() v1.CloudProviderID { + return CloudProviderID } -func (c *SFCClient) GetAPIType() v1.APIType { return v1.APITypeGlobal /* or Global */ } -func (c *SFCClient) GetCloudProviderID() v1.CloudProviderID { return "sfcompute" } -func (c *SFCClient) GetReferenceID() string { return c.refID } -func (c *SFCClient) GetTenantID() (string, error) { return "", nil } +func (c *SFCClient) GetReferenceID() string { + return c.refID +} + +func (c *SFCClient) GetTenantID() (string, error) { + // sfc does not have a tenant system, return empty string + return "", nil +} func (c *SFCClient) MakeClient(_ context.Context, location string) (v1.CloudClient, error) { c.location = location diff --git a/v1/providers/sfcompute/instance.go b/v1/providers/sfcompute/instance.go index ad3e7b6..a1418bf 100644 --- a/v1/providers/sfcompute/instance.go +++ b/v1/providers/sfcompute/instance.go @@ -4,14 +4,21 @@ import ( "context" "encoding/base64" "fmt" + "slices" "strings" "time" + "github.com/brevdev/cloud/internal/errors" v1 "github.com/brevdev/cloud/v1" sfcnodes "github.com/sfcompute/nodes-go" "github.com/sfcompute/nodes-go/packages/param" ) +const ( + maxPricePerNodeHour = 1600 + defaultPort = 2222 +) + // define function to convert string to b64 func toBase64(s string) string { return base64.StdEncoding.EncodeToString([]byte(s)) @@ -42,106 +49,285 @@ func mapSFCStatus(s string) v1.LifecycleStatus { } func (c *SFCClient) CreateInstance(ctx context.Context, attrs v1.CreateInstanceAttrs) (*v1.Instance, error) { + // Get the zone for the location (do not include unavailable zones) + zone, err := c.getZone(ctx, attrs.Location, false) + if err != nil { + return nil, errors.WrapAndTrace(err) + } + + // Create a name for the node + name := brevDataToSFCName(attrs.RefID, attrs.Name) + + // Create the node resp, err := c.client.Nodes.New(ctx, sfcnodes.NodeNewParams{ CreateNodesRequest: sfcnodes.CreateNodesRequestParam{ DesiredCount: 1, - MaxPricePerNodeHour: 1600, - Zone: attrs.Location, - ImageID: param.Opt[string]{Value: attrs.ImageID}, // this needs to point to a valid image + MaxPricePerNodeHour: maxPricePerNodeHour, + Zone: zone.Name, + Names: []string{name}, CloudInitUserData: param.Opt[string]{Value: sshKeyCloudInit(attrs.PublicKey)}, // encode ssh key to b64-wrapped cloud-init script }, }) if err != nil { - return nil, err + return nil, errors.WrapAndTrace(err) } - if len(resp.Data) == 0 { - return nil, fmt.Errorf("no nodes returned") + return nil, errors.WrapAndTrace(fmt.Errorf("no nodes returned")) } node := resp.Data[0] - inst := &v1.Instance{ - Name: attrs.Name, - RefID: attrs.RefID, - CloudCredRefID: c.refID, - CloudID: v1.CloudProviderInstanceID(node.ID), // SFC ID - ImageID: attrs.ImageID, - InstanceType: attrs.InstanceType, - Location: attrs.Location, - CreatedAt: time.Now(), - Status: v1.Status{LifecycleStatus: mapSFCStatus(fmt.Sprint(node.Status))}, // map SDK status to our lifecycle - InstanceTypeID: v1.InstanceTypeID(node.GPUType), - SSHPort: 2222, // we use 2222/tcp for all of our SSH ports + // Get the instance + instance, err := c.GetInstance(ctx, v1.CloudProviderInstanceID(node.ID)) + if err != nil { + return nil, errors.WrapAndTrace(err) } - return inst, nil + return instance, nil } func (c *SFCClient) GetInstance(ctx context.Context, id v1.CloudProviderInstanceID) (*v1.Instance, error) { + // Get the node from the API node, err := c.client.Nodes.Get(ctx, string(id)) if err != nil { - panic(err.Error()) + return nil, errors.WrapAndTrace(err) } - var vmID string - if len(node.VMs.Data) > 0 { - vmID = node.VMs.Data[0].ID - fmt.Println(vmID) + + // Get the zone for the location (include unavailable zones, in case the zone is not available but the node is still running) + zone, err := c.getZone(ctx, node.Zone, true) + if err != nil { + return nil, errors.WrapAndTrace(err) } - ssh, err := c.client.VMs.SSH(ctx, sfcnodes.VMSSHParams{VMID: vmID}) + nodeInfo, err := c.sfcNodeInfoFromNode(ctx, node, zone) if err != nil { - panic(err.Error()) + return nil, errors.WrapAndTrace(err) } - inst := &v1.Instance{ - Name: node.Name, - RefID: c.refID, - CloudCredRefID: c.refID, - CloudID: v1.CloudProviderInstanceID(node.ID), // SFC ID - PublicIP: ssh.SSHHostname, - CreatedAt: time.Unix(node.CreatedAt, 0), - Status: v1.Status{LifecycleStatus: mapSFCStatus(fmt.Sprint(node.Status))}, // map SDK status to our lifecycle - InstanceTypeID: v1.InstanceTypeID(node.GPUType), + instance, err := c.sfcNodeToBrevInstance(*nodeInfo) + if err != nil { + return nil, errors.WrapAndTrace(err) } - return inst, nil + return instance, nil } -func (c *SFCClient) ListInstances(ctx context.Context, _ v1.ListInstancesArgs) ([]v1.Instance, error) { +func (c *SFCClient) getZone(ctx context.Context, location string, includeUnavailable bool) (*sfcnodes.ZoneListResponseData, error) { + // Fetch the zones to ensure the location is valid + zones, err := c.getZones(ctx, includeUnavailable) + if err != nil { + return nil, errors.WrapAndTrace(err) + } + if len(zones) == 0 { + return nil, errors.WrapAndTrace(fmt.Errorf("no zones available")) + } + + // Find the zone that matches the location + var zone *sfcnodes.ZoneListResponseData + for _, z := range zones { + if z.Name == location { + zone = &z + break + } + } + if zone == nil { + return nil, errors.WrapAndTrace(fmt.Errorf("zone not found in location %s", location)) + } + + return zone, nil +} + +func (c *SFCClient) ListInstances(ctx context.Context, args v1.ListInstancesArgs) ([]v1.Instance, error) { resp, err := c.client.Nodes.List(ctx, sfcnodes.NodeListParams{}) if err != nil { return nil, err } + zoneCache := make(map[string]*sfcnodes.ZoneListResponseData) + var instances []v1.Instance for _, node := range resp.Data { - inst, err := c.GetInstance(ctx, v1.CloudProviderInstanceID(node.ID)) + // Get the zone for the node, checking the cache first + zone, ok := zoneCache[node.Zone] + if !ok { + z, err := c.getZone(ctx, node.Zone, true) + if err != nil { + return nil, errors.WrapAndTrace(err) + } + zoneCache[node.Zone] = z + zone = z + } + + // Filter by locations + if args.Locations != nil && !args.Locations.IsAllowed(zone.Name) { + continue + } + + // Filter by instance IDs + if args.InstanceIDs != nil && !slices.Contains(args.InstanceIDs, v1.CloudProviderInstanceID(node.ID)) { + continue + } + + nodeInfo, err := c.sfcNodeInfoFromNodeListResponseData(ctx, &node, zone) if err != nil { - return nil, err + return nil, errors.WrapAndTrace(err) } - if inst != nil { - instances = append(instances, *inst) + + inst, err := c.sfcNodeToBrevInstance(*nodeInfo) + if err != nil { + return nil, errors.WrapAndTrace(err) } + instances = append(instances, *inst) } - // TODO: filter by args - return instances, nil } func (c *SFCClient) TerminateInstance(ctx context.Context, id v1.CloudProviderInstanceID) error { - // release the node first - _, errRelease := c.client.Nodes.Release(ctx, string(id)) - if errRelease != nil { - panic(errRelease.Error()) - } - // then delete the node - errDelete := c.client.Nodes.Delete(ctx, string(id)) - if errDelete != nil { - panic(errDelete.Error()) + _, err := c.client.Nodes.Release(ctx, string(id)) + if err != nil { + return errors.WrapAndTrace(err) } return nil } +type sfcNodeInfo struct { + id string + name string + createdAt time.Time + status v1.LifecycleStatus + gpuType string + sshUsername string + sshHostname string + zone *sfcnodes.ZoneListResponseData +} + +func (c *SFCClient) sfcNodeToBrevInstance(node sfcNodeInfo) (*v1.Instance, error) { + // Get the refID and name from the node name + refID, name, err := sfcNameToBrevData(node.name) + if err != nil { + return nil, errors.WrapAndTrace(err) + } + + // Get the instance type for the zone + instanceType := getInstanceTypeForZone(*node.zone) + + // Create the instance + inst := &v1.Instance{ + Name: name, + CloudID: v1.CloudProviderInstanceID(node.id), + RefID: refID, + PublicDNS: node.sshHostname, + PublicIP: node.sshHostname, + SSHUser: node.sshUsername, + SSHPort: defaultPort, + CreatedAt: node.createdAt, + DiskSizeBytes: instanceType.SupportedStorage[0].SizeBytes, // TODO: this should be pulled from the node iteself + Status: v1.Status{ + LifecycleStatus: node.status, + }, + InstanceTypeID: instanceType.ID, + InstanceType: instanceType.Type, + Location: node.zone.Name, + Spot: false, + Stoppable: false, + Rebootable: false, + CloudCredRefID: c.refID, // TODO: this should be pulled from the node iteself + } + return inst, nil +} + +func (c *SFCClient) sfcNodeInfoFromNode(ctx context.Context, node *sfcnodes.Node, zone *sfcnodes.ZoneListResponseData) (*sfcNodeInfo, error) { + var sshUsername string + var sshHostname string + + if len(node.VMs.Data) == 1 { + username, hostname, err := c.getSSHDetailsFromVM(ctx, node.VMs.Data[0].ID, node.VMs.Data[0].Status) + if err != nil { + return nil, errors.WrapAndTrace(err) + } + sshUsername = username + sshHostname = hostname + } else if len(node.VMs.Data) <= 0 { + sshUsername = "" + sshHostname = "" + } else { + return nil, errors.WrapAndTrace(fmt.Errorf("multiple VMs found for node %s", node.ID)) + } + + return &sfcNodeInfo{ + id: node.ID, + name: node.Name, + createdAt: time.Unix(node.CreatedAt, 0), + status: mapSFCStatus(fmt.Sprint(node.Status)), + gpuType: string(node.GPUType), + sshUsername: sshUsername, + sshHostname: sshHostname, + zone: zone, + }, nil +} + +func (c *SFCClient) sfcNodeInfoFromNodeListResponseData(ctx context.Context, node *sfcnodes.ListResponseNodeData, zone *sfcnodes.ZoneListResponseData) (*sfcNodeInfo, error) { + var sshUsername string + var sshHostname string + + if len(node.VMs.Data) == 1 { + username, hostname, err := c.getSSHDetailsFromVM(ctx, node.VMs.Data[0].ID, node.VMs.Data[0].Status) + if err != nil { + return nil, errors.WrapAndTrace(err) + } + sshUsername = username + sshHostname = hostname + } else if len(node.VMs.Data) == 0 { + sshUsername = "" + sshHostname = "" + } else { + return nil, errors.WrapAndTrace(fmt.Errorf("multiple VMs found for node %s", node.ID)) + } + + return &sfcNodeInfo{ + id: node.ID, + name: node.Name, + createdAt: time.Unix(node.CreatedAt, 0), + status: mapSFCStatus(fmt.Sprint(node.Status)), + gpuType: string(node.GPUType), + sshUsername: sshUsername, + sshHostname: sshHostname, + zone: zone, + }, nil +} + +func (c *SFCClient) getSSHDetailsFromVM(ctx context.Context, vmID string, vmStatus string) (string, string, error) { + var sshUsername string + var sshHostname string + + // If the VM is not running, set the SSH username and hostname to empty strings + if strings.ToLower(vmStatus) != "running" { + return "", "", nil + } + + // If the VM is running, get the SSH username and hostname + sshResponse, err := c.client.VMs.SSH(ctx, sfcnodes.VMSSHParams{VMID: vmID}) + if err != nil { + return "", "", errors.WrapAndTrace(err) + } + + sshUsername = "ubuntu" // TODO: ?? + sshHostname = sshResponse.SSHHostname + + return sshUsername, sshHostname, nil +} + +func brevDataToSFCName(refID string, name string) string { + return fmt.Sprintf("%s_%s", refID, name) +} + +func sfcNameToBrevData(name string) (string, string, error) { + parts := strings.Split(name, "_") + if len(parts) != 2 { + return "", "", errors.WrapAndTrace(fmt.Errorf("invalid node name %s", name)) + } + return parts[0], parts[1], nil +} + // Optional if supported: func (c *SFCClient) RebootInstance(_ context.Context, _ v1.CloudProviderInstanceID) error { return v1.ErrNotImplemented diff --git a/v1/providers/sfcompute/instancetype.go b/v1/providers/sfcompute/instancetype.go index c6d8406..9812dcd 100644 --- a/v1/providers/sfcompute/instancetype.go +++ b/v1/providers/sfcompute/instancetype.go @@ -4,100 +4,183 @@ import ( "context" "fmt" "slices" - "strconv" + "strings" "time" "github.com/bojanz/currency" + sfcnodes "github.com/sfcompute/nodes-go" v1 "github.com/brevdev/cloud/v1" ) -func (c *SFCClient) getInstanceTypeID(region string) string { - return fmt.Sprintf("h100v_%v", region) +const ( + gpuTypeH100 = "h100" + gpuTypeH200 = "h200" + + deliveryTypeVM = "VM" + interconnectInfiniband = "infiniband" +) + +var ( + allowedZones = []string{"hayesvalley", "yerba"} + + gpuToVRAM = map[string]v1.Bytes{ + gpuTypeH100: v1.NewBytes(80, v1.Gigabyte), + gpuTypeH200: v1.NewBytes(141, v1.Gigabyte), + } + gpuToFormFactor = map[string]string{ + gpuTypeH100: "sxm5", + gpuTypeH200: "sxm5", + } + gpuToArchitecture = map[string]v1.Architecture{ + gpuTypeH100: v1.ArchitectureX86_64, + gpuTypeH200: v1.ArchitectureX86_64, + } + + defaultGPUCountPerNode = int32(8) + defaultGPUManufacturer = "nvidia" + defaultRAMPerNode = v1.NewBytes(960, v1.Gigabyte) + defaultStoragePerNode = v1.NewBytes(1500, v1.Gigabyte) + defaultProvisioningTime = 5 * time.Minute + defaultPricePerGPU = makeDefaultInstanceTypePrice("2.00", "USD") +) + +func makeDefaultInstanceTypePrice(amount string, currencyCode string) currency.Amount { + instanceTypePrice, err := currency.NewAmount(amount, currencyCode) + if err != nil { + panic(err) + } + return instanceTypePrice } func (c *SFCClient) GetInstanceTypes(ctx context.Context, args v1.GetInstanceTypeArgs) ([]v1.InstanceType, error) { - resp, err := c.client.Zones.List(ctx) + // Fetch all available zones + includeUnavailable := false + zones, err := c.getZones(ctx, includeUnavailable) if err != nil { return nil, err } - types := make([]v1.InstanceType, 0) - for _, zone := range resp.Data { - if len(args.Locations) > 0 && !args.Locations.IsAllowed(zone.Name) { + instanceTypes := make([]v1.InstanceType, 0, len(zones)) + for _, zone := range zones { + gpuType := strings.ToLower(string(zone.HardwareType)) + + if !gpuTypeIsAllowed(gpuType) { continue } - available := false - if len(zone.AvailableCapacity) > 0 && zone.DeliveryType == "VM" { - available = true + + instanceType := getInstanceTypeForZone(zone) + + if v1.IsSelectedByArgs(instanceType, args) { + instanceTypes = append(instanceTypes, instanceType) } + } + + return instanceTypes, nil +} + +func getInstanceTypeForZone(zone sfcnodes.ZoneListResponseData) v1.InstanceType { + gpuType := strings.ToLower(string(zone.HardwareType)) - price, _ := currency.NewAmount(strconv.Itoa(2), "USD") - estimatedDeployTime := 15 * time.Minute - - types = append(types, v1.InstanceType{ - ID: v1.InstanceTypeID(c.getInstanceTypeID(zone.Name)), - IsAvailable: available, - Type: "h100v", - Location: zone.Name, - Stoppable: false, - Rebootable: false, - IsContainer: false, - BasePrice: &price, - EstimatedDeployTime: &estimatedDeployTime, - SupportedGPUs: []v1.GPU{{ - Count: 8, - Type: "h100v", - Manufacturer: "nvidia", - Name: "h100v", - MemoryBytes: v1.NewBytes(80, v1.Gibibyte), - }}, - }) + instanceType := v1.InstanceType{ + IsAvailable: true, + Type: makeInstanceTypeName(zone), + MemoryBytes: defaultRAMPerNode, + Location: zoneToLocation(zone).Name, + Stoppable: false, + Rebootable: false, + IsContainer: false, + Provider: CloudProviderID, + BasePrice: &defaultPricePerGPU, + EstimatedDeployTime: &defaultProvisioningTime, + SupportedGPUs: []v1.GPU{{ + Count: defaultGPUCountPerNode, + Type: gpuType, + Manufacturer: v1.GetManufacturer(defaultGPUManufacturer), + Name: gpuType, + MemoryBytes: gpuToVRAM[gpuType], + NetworkDetails: gpuToFormFactor[gpuType], + }}, + SupportedStorage: []v1.Storage{{ + Type: "ssd", + Count: 1, + SizeBytes: defaultStoragePerNode, + }}, + SupportedArchitectures: []v1.Architecture{gpuToArchitecture[gpuType]}, } - if len(args.InstanceTypes) > 0 { - filteredTypes := make([]v1.InstanceType, 0) - for _, t := range types { - if slices.Contains(args.InstanceTypes, t.Type) { - filteredTypes = append(filteredTypes, t) - } - } - return filteredTypes, nil + instanceType.ID = v1.MakeGenericInstanceTypeID(instanceType) + + return instanceType +} + +func gpuTypeIsAllowed(gpuType string) bool { + return gpuType == gpuTypeH100 || gpuType == gpuTypeH200 +} + +func makeInstanceTypeName(zone sfcnodes.ZoneListResponseData) string { + interconnect := "" + if strings.ToLower(zone.InterconnectType) == interconnectInfiniband { + interconnect = ".ib" + } + return fmt.Sprintf("%s%s", strings.ToLower(string(zone.HardwareType)), interconnect) +} + +func (c *SFCClient) GetLocations(ctx context.Context, args v1.GetLocationsArgs) ([]v1.Location, error) { + zones, err := c.getZones(ctx, args.IncludeUnavailable) + if err != nil { + return nil, err + } + + locations := make([]v1.Location, 0, len(zones)) + for _, zone := range zones { + location := zoneToLocation(zone) + locations = append(locations, location) } - return types, nil + return locations, nil } -func (c *SFCClient) GetLocations(ctx context.Context, _ v1.GetLocationsArgs) ([]v1.Location, error) { +func (c *SFCClient) getZones(ctx context.Context, includeUnavailable bool) ([]sfcnodes.ZoneListResponseData, error) { + // Fetch the zones from the API resp, err := c.client.Zones.List(ctx) if err != nil { return nil, err } - locations := make(map[string]v1.Location) - allowedZones := []string{"hayesvalley", "yerba"} - if resp != nil { - for _, zone := range resp.Data { - available := false - if len(zone.AvailableCapacity) > 0 && zone.DeliveryType == "VM" && slices.Contains(allowedZones, zone.Name) { - available = true - locations[zone.Name] = v1.Location{ - Name: zone.Name, - Description: fmt.Sprintf("sfc_%s_%s", zone.Name, string(zone.HardwareType)), - Available: available, - } - } else { - available = false - locations[zone.Name] = v1.Location{ - Name: zone.Name, - Description: fmt.Sprintf("sfc_%s_%s", zone.Name, string(zone.HardwareType)), - Available: false, - } - } + + // If there are no zones, return an empty list + if resp == nil || len(resp.Data) == 0 { + return []sfcnodes.ZoneListResponseData{}, nil + } + + zones := make([]sfcnodes.ZoneListResponseData, 0, len(resp.Data)) + for _, zone := range resp.Data { + // If the zone is not allowed, skip it + if !slices.Contains(allowedZones, strings.ToLower(zone.Name)) { + continue + } + + // If the there is no available capacity, and skip it + if len(zone.AvailableCapacity) == 0 && !includeUnavailable { + continue + } + + // If the delivery type is not VM, skip it + if zone.DeliveryType != deliveryTypeVM { + continue } + + // Add the zone to the list + zones = append(zones, zone) } - availableLocations := []v1.Location{} - for _, location := range locations { - availableLocations = append(availableLocations, location) + + return zones, nil +} + +func zoneToLocation(zone sfcnodes.ZoneListResponseData) v1.Location { + return v1.Location{ + Name: zone.Name, + Description: fmt.Sprintf("sfc_%s_%s", zone.Name, string(zone.HardwareType)), + Available: true, } - return availableLocations, nil } diff --git a/v1/providers/sfcompute/instancetype_test.go b/v1/providers/sfcompute/instancetype_test.go new file mode 100644 index 0000000..f313029 --- /dev/null +++ b/v1/providers/sfcompute/instancetype_test.go @@ -0,0 +1,166 @@ +package v1 + +// import ( +// "context" +// "fmt" +// "testing" +// "time" + +// "github.com/brevdev/cloud/internal/ssh" +// v1 "github.com/brevdev/cloud/v1" +// "github.com/google/uuid" +// ) + +// func TestGetInstanceTypes(t *testing.T) { +// t.Parallel() +// checkSkip(t) +// apiKey := getAPIKey() + +// credential := NewSFCCredential("validation-test", apiKey) +// client, err := credential.MakeClient(context.Background(), "eu-north1") +// if err != nil { +// t.Fatalf("failed to make client: %v", err) +// } + +// locations, err := client.GetLocations(context.Background(), v1.GetLocationsArgs{ +// IncludeUnavailable: true, +// }) +// if err != nil { +// t.Fatalf("failed to get locations: %v", err) +// } + +// t.Logf("locations: %v", locations) + +// instanceTypes, err := client.GetInstanceTypes(context.Background(), v1.GetInstanceTypeArgs{ +// Locations: v1.LocationsFilter{"all"}, +// }) +// if err != nil { +// t.Fatalf("failed to get instance types: %v", err) +// } + +// t.Logf("instance types: %v", instanceTypes) +// } + +// func TestCreateInstance(t *testing.T) { +// t.Parallel() +// checkSkip(t) +// apiKey := getAPIKey() + +// credential := NewSFCCredential("validation-test", apiKey) +// client, err := credential.MakeClient(context.Background(), "eu-north1") +// if err != nil { +// t.Fatalf("failed to make client: %v", err) +// } + +// id := uuid.New().String() + +// instance, err := client.CreateInstance(context.Background(), v1.CreateInstanceAttrs{ +// Name: "test", +// RefID: id, +// PublicKey: ssh.GetTestPublicKey(), +// InstanceType: "h100", +// Location: "hayesvalley", +// }) +// if err != nil { +// t.Fatalf("failed to create instance: %v", err) +// } + +// t.Logf("instance: %v", instance) +// } + +// func TestGetInstance(t *testing.T) { +// t.Parallel() +// checkSkip(t) +// apiKey := getAPIKey() + +// credential := NewSFCCredential("validation-test", apiKey) +// client, err := credential.MakeClient(context.Background(), "") +// if err != nil { +// t.Fatalf("failed to make client: %v", err) +// } + +// instance, err := client.GetInstance(context.Background(), "6c7a3ade-1e59-4e04-af6e-365046995a81_test") +// if err != nil { +// t.Fatalf("failed to get instance: %v", err) +// } + +// t.Logf("instance: %v", instance) + +// // status +// t.Logf("status: %v", instance.Status) + +// // ssh details +// t.Logf("ssh details: %v,%v,%v", instance.SSHUser, instance.SSHPort, instance.PublicIP) +// } + +// func TestSSHInstance(t *testing.T) { +// t.Parallel() +// checkSkip(t) +// apiKey := getAPIKey() + +// credential := NewSFCCredential("validation-test", apiKey) +// client, err := credential.MakeClient(context.Background(), "") +// if err != nil { +// t.Fatalf("failed to make client: %v", err) +// } + +// instance, err := client.GetInstance(context.Background(), "6c7a3ade-1e59-4e04-af6e-365046995a81_test") +// if err != nil { +// t.Fatalf("failed to get instance: %v", err) +// } + +// t.Logf("instance: %v", instance) + +// // ssh details +// t.Logf("ssh details: %v,%v,%v", instance.SSHUser, instance.SSHPort, instance.PublicIP) + +// // ssh to instance +// err = ssh.WaitForSSH(context.Background(), ssh.ConnectionConfig{ +// User: "root", +// HostPort: fmt.Sprintf("%s:%d", instance.PublicIP, instance.SSHPort), +// PrivKey: ssh.GetTestPrivateKey(), +// }, ssh.WaitForSSHOptions{ +// Timeout: 10 * time.Second, +// }) +// if err != nil { +// t.Fatalf("failed to wait for SSH: %v", err) +// } + +// t.Logf("SSH connection validated successfully for %s@%s:%d", instance.SSHUser, instance.PublicIP, instance.SSHPort) +// } + +// func TestListInstances(t *testing.T) { +// t.Parallel() +// checkSkip(t) +// apiKey := getAPIKey() + +// credential := NewSFCCredential("validation-test", apiKey) +// client, err := credential.MakeClient(context.Background(), "") +// if err != nil { +// t.Fatalf("failed to make client: %v", err) +// } + +// instances, err := client.ListInstances(context.Background(), v1.ListInstancesArgs{}) +// if err != nil { +// t.Fatalf("failed to list instances: %v", err) +// } + +// t.Logf("instances: %v", instances) +// } + +// func TestTerminateInstance(t *testing.T) { +// t.Parallel() +// checkSkip(t) +// apiKey := getAPIKey() + +// credential := NewSFCCredential("validation-test", apiKey) +// client, err := credential.MakeClient(context.Background(), "") +// if err != nil { +// t.Fatalf("failed to make client: %v", err) +// } + +// err = client.TerminateInstance(context.Background(), "6c7a3ade-1e59-4e04-af6e-365046995a81_test") +// if err != nil { +// t.Fatalf("failed to terminate instance: %v", err) +// } +// } diff --git a/v1/providers/sfcompute/validation_test.go b/v1/providers/sfcompute/validation_test.go index 6f86f49..1dd6adc 100644 --- a/v1/providers/sfcompute/validation_test.go +++ b/v1/providers/sfcompute/validation_test.go @@ -16,8 +16,8 @@ func TestValidationFunctions(t *testing.T) { config := validation.ProviderConfig{ Credential: NewSFCCredential("validation-test", apiKey), StableIDs: []v1.InstanceTypeID{ - "h100v_hayesvalley", - "h100v_yerba", + "hayesvalley-noSub-h100", + "yerba-noSub-h100", }, } @@ -31,7 +31,7 @@ func TestInstanceLifecycleValidation(t *testing.T) { config := validation.ProviderConfig{ Credential: NewSFCCredential("validation-test", apiKey), - Location: "eu-north1", + Location: "yerba", } validation.RunInstanceLifecycleValidation(t, config) From 07dceb515b4cc930206d7905cd39546eb6b51e98 Mon Sep 17 00:00:00 2001 From: Drew Malin Date: Thu, 5 Feb 2026 14:30:09 -0800 Subject: [PATCH 05/17] fix --- v1/providers/sfcompute/capabilities.go | 1 - v1/providers/sfcompute/instance.go | 58 ++++++++++++-------------- 2 files changed, 26 insertions(+), 33 deletions(-) diff --git a/v1/providers/sfcompute/capabilities.go b/v1/providers/sfcompute/capabilities.go index ac0604a..cad2ca7 100644 --- a/v1/providers/sfcompute/capabilities.go +++ b/v1/providers/sfcompute/capabilities.go @@ -11,7 +11,6 @@ func getSFCCapabilities() v1.Capabilities { v1.CapabilityCreateInstance, v1.CapabilityTerminateInstance, v1.CapabilityCreateTerminateInstance, - // add others supported by your provider: reboot, stop/start, machine-image, tags, resize-volume, modify-firewall, etc. } } diff --git a/v1/providers/sfcompute/instance.go b/v1/providers/sfcompute/instance.go index a1418bf..a6dec95 100644 --- a/v1/providers/sfcompute/instance.go +++ b/v1/providers/sfcompute/instance.go @@ -17,37 +17,9 @@ import ( const ( maxPricePerNodeHour = 1600 defaultPort = 2222 + defaultSSHUsername = "root" ) -// define function to convert string to b64 -func toBase64(s string) string { - return base64.StdEncoding.EncodeToString([]byte(s)) -} - -// define function to add ssh key to cloud init -func sshKeyCloudInit(sshKey string) string { - return toBase64(fmt.Sprintf("#cloud-config\nssh_authorized_keys:\n - %s", sshKey)) -} - -func mapSFCStatus(s string) v1.LifecycleStatus { - switch strings.ToLower(s) { - case "pending", "nodefailure", "unspecified", "awaitingcapacity", "unknown", "failed": - return v1.LifecycleStatusPending - case "running": - return v1.LifecycleStatusRunning - // case "stopping": - // return v1.LifecycleStatusStopping - case "stopped": - return v1.LifecycleStatusStopped - case "terminating", "released": - return v1.LifecycleStatusTerminating - case "destroyed", "deleted": - return v1.LifecycleStatusTerminated - default: - return v1.LifecycleStatusPending - } -} - func (c *SFCClient) CreateInstance(ctx context.Context, attrs v1.CreateInstanceAttrs) (*v1.Instance, error) { // Get the zone for the location (do not include unavailable zones) zone, err := c.getZone(ctx, attrs.Location, false) @@ -85,6 +57,11 @@ func (c *SFCClient) CreateInstance(ctx context.Context, attrs v1.CreateInstanceA return instance, nil } +func sshKeyCloudInit(sshKey string) string { + script := fmt.Sprintf("#cloud-config\nssh_authorized_keys:\n - %s", sshKey) + return base64.StdEncoding.EncodeToString([]byte(script)) +} + func (c *SFCClient) GetInstance(ctx context.Context, id v1.CloudProviderInstanceID) (*v1.Instance, error) { // Get the node from the API node, err := c.client.Nodes.Get(ctx, string(id)) @@ -257,7 +234,7 @@ func (c *SFCClient) sfcNodeInfoFromNode(ctx context.Context, node *sfcnodes.Node id: node.ID, name: node.Name, createdAt: time.Unix(node.CreatedAt, 0), - status: mapSFCStatus(fmt.Sprint(node.Status)), + status: sfcStatusToLifecycleStatus(fmt.Sprint(node.Status)), gpuType: string(node.GPUType), sshUsername: sshUsername, sshHostname: sshHostname, @@ -287,7 +264,7 @@ func (c *SFCClient) sfcNodeInfoFromNodeListResponseData(ctx context.Context, nod id: node.ID, name: node.Name, createdAt: time.Unix(node.CreatedAt, 0), - status: mapSFCStatus(fmt.Sprint(node.Status)), + status: sfcStatusToLifecycleStatus(fmt.Sprint(node.Status)), gpuType: string(node.GPUType), sshUsername: sshUsername, sshHostname: sshHostname, @@ -295,6 +272,23 @@ func (c *SFCClient) sfcNodeInfoFromNodeListResponseData(ctx context.Context, nod }, nil } +func sfcStatusToLifecycleStatus(status string) v1.LifecycleStatus { + switch strings.ToLower(status) { + case "pending", "nodefailure", "unspecified", "awaitingcapacity", "unknown", "failed": + return v1.LifecycleStatusPending + case "running": + return v1.LifecycleStatusRunning + case "stopped": + return v1.LifecycleStatusStopped + case "terminating", "released": + return v1.LifecycleStatusTerminating + case "destroyed", "deleted": + return v1.LifecycleStatusTerminated + default: + return v1.LifecycleStatusPending + } +} + func (c *SFCClient) getSSHDetailsFromVM(ctx context.Context, vmID string, vmStatus string) (string, string, error) { var sshUsername string var sshHostname string @@ -310,7 +304,7 @@ func (c *SFCClient) getSSHDetailsFromVM(ctx context.Context, vmID string, vmStat return "", "", errors.WrapAndTrace(err) } - sshUsername = "ubuntu" // TODO: ?? + sshUsername = defaultSSHUsername sshHostname = sshResponse.SSHHostname return sshUsername, sshHostname, nil From 1cebbaf2678ca5040cbd07c017b3dab690fa0084 Mon Sep 17 00:00:00 2001 From: Drew Malin Date: Thu, 5 Feb 2026 14:49:10 -0800 Subject: [PATCH 06/17] ssh, lint --- v1/providers/sfcompute/instance.go | 77 +++++++++++++++++++----------- 1 file changed, 48 insertions(+), 29 deletions(-) diff --git a/v1/providers/sfcompute/instance.go b/v1/providers/sfcompute/instance.go index a6dec95..68baecf 100644 --- a/v1/providers/sfcompute/instance.go +++ b/v1/providers/sfcompute/instance.go @@ -17,7 +17,7 @@ import ( const ( maxPricePerNodeHour = 1600 defaultPort = 2222 - defaultSSHUsername = "root" + defaultSSHUsername = "ubuntu" ) func (c *SFCClient) CreateInstance(ctx context.Context, attrs v1.CreateInstanceAttrs) (*v1.Instance, error) { @@ -37,7 +37,7 @@ func (c *SFCClient) CreateInstance(ctx context.Context, attrs v1.CreateInstanceA MaxPricePerNodeHour: maxPricePerNodeHour, Zone: zone.Name, Names: []string{name}, - CloudInitUserData: param.Opt[string]{Value: sshKeyCloudInit(attrs.PublicKey)}, // encode ssh key to b64-wrapped cloud-init script + CloudInitUserData: param.Opt[string]{Value: sshKeyCloudInit(attrs.PublicKey)}, }, }) if err != nil { @@ -197,7 +197,7 @@ func (c *SFCClient) sfcNodeToBrevInstance(node sfcNodeInfo) (*v1.Instance, error SSHUser: node.sshUsername, SSHPort: defaultPort, CreatedAt: node.createdAt, - DiskSizeBytes: instanceType.SupportedStorage[0].SizeBytes, // TODO: this should be pulled from the node iteself + DiskSizeBytes: instanceType.SupportedStorage[0].SizeBytes, // TODO: this should be pulled from the node itself Status: v1.Status{ LifecycleStatus: node.status, }, @@ -207,7 +207,7 @@ func (c *SFCClient) sfcNodeToBrevInstance(node sfcNodeInfo) (*v1.Instance, error Spot: false, Stoppable: false, Rebootable: false, - CloudCredRefID: c.refID, // TODO: this should be pulled from the node iteself + CloudCredRefID: c.refID, // TODO: this should be pulled from the node itself } return inst, nil } @@ -216,14 +216,14 @@ func (c *SFCClient) sfcNodeInfoFromNode(ctx context.Context, node *sfcnodes.Node var sshUsername string var sshHostname string - if len(node.VMs.Data) == 1 { + if len(node.VMs.Data) == 1 { //nolint:gocritic // ok username, hostname, err := c.getSSHDetailsFromVM(ctx, node.VMs.Data[0].ID, node.VMs.Data[0].Status) if err != nil { return nil, errors.WrapAndTrace(err) } sshUsername = username sshHostname = hostname - } else if len(node.VMs.Data) <= 0 { + } else if len(node.VMs.Data) == 0 { sshUsername = "" sshHostname = "" } else { @@ -243,33 +243,52 @@ func (c *SFCClient) sfcNodeInfoFromNode(ctx context.Context, node *sfcnodes.Node } func (c *SFCClient) sfcNodeInfoFromNodeListResponseData(ctx context.Context, node *sfcnodes.ListResponseNodeData, zone *sfcnodes.ZoneListResponseData) (*sfcNodeInfo, error) { - var sshUsername string - var sshHostname string + sfcNode := sfcListResponseNodeDataToNode(node) + return c.sfcNodeInfoFromNode(ctx, sfcNode, zone) +} - if len(node.VMs.Data) == 1 { - username, hostname, err := c.getSSHDetailsFromVM(ctx, node.VMs.Data[0].ID, node.VMs.Data[0].Status) - if err != nil { - return nil, errors.WrapAndTrace(err) +// Convert the sfcnodes.ListResponseNodeData into a node *sfcnodes.Node -- these are fundamentally the same object, but they +// lack a common interface. One type is returned from a single "get" call, the other is the type of each object returned by +// a "list" call. This conversion function allows the rest of our business logic to treat these as the same type. +func sfcListResponseNodeDataToNode(node *sfcnodes.ListResponseNodeData) *sfcnodes.Node { + vms := make([]sfcnodes.NodeVMsData, len(node.VMs.Data)) + for i, vm := range node.VMs.Data { + vms[i] = sfcnodes.NodeVMsData{ //nolint:staticcheck // ok + ID: vm.ID, + CreatedAt: vm.CreatedAt, + EndAt: vm.EndAt, + Object: vm.Object, + StartAt: vm.StartAt, + Status: vm.Status, + UpdatedAt: vm.UpdatedAt, + ImageID: vm.ImageID, + JSON: vm.JSON, } - sshUsername = username - sshHostname = hostname - } else if len(node.VMs.Data) == 0 { - sshUsername = "" - sshHostname = "" - } else { - return nil, errors.WrapAndTrace(fmt.Errorf("multiple VMs found for node %s", node.ID)) } - return &sfcNodeInfo{ - id: node.ID, - name: node.Name, - createdAt: time.Unix(node.CreatedAt, 0), - status: sfcStatusToLifecycleStatus(fmt.Sprint(node.Status)), - gpuType: string(node.GPUType), - sshUsername: sshUsername, - sshHostname: sshHostname, - zone: zone, - }, nil + return &sfcnodes.Node{ + ID: node.ID, + GPUType: node.GPUType, + Name: node.Name, + NodeType: node.NodeType, + Object: node.Object, + Owner: node.Owner, + Status: node.Status, + CreatedAt: node.CreatedAt, + DeletedAt: node.DeletedAt, + EndAt: node.EndAt, + MaxPricePerNodeHour: node.MaxPricePerNodeHour, + ProcurementID: node.ProcurementID, + StartAt: node.StartAt, + UpdatedAt: node.UpdatedAt, + Zone: node.Zone, + JSON: node.JSON, + VMs: sfcnodes.NodeVMs{ + Data: vms, + Object: node.VMs.Object, + JSON: node.VMs.JSON, + }, + } } func sfcStatusToLifecycleStatus(status string) v1.LifecycleStatus { From 4f5430278f86663fc50a89a3e9394ebb2af54379 Mon Sep 17 00:00:00 2001 From: Drew Malin Date: Thu, 5 Feb 2026 15:08:30 -0800 Subject: [PATCH 07/17] scripts, cleanup --- v1/providers/sfcompute/instancetype_test.go | 166 ----------------- .../sfcompute/scripts/instancetype_test.go | 169 ++++++++++++++++++ v1/providers/sfcompute/validation_test.go | 6 +- 3 files changed, 172 insertions(+), 169 deletions(-) delete mode 100644 v1/providers/sfcompute/instancetype_test.go create mode 100644 v1/providers/sfcompute/scripts/instancetype_test.go diff --git a/v1/providers/sfcompute/instancetype_test.go b/v1/providers/sfcompute/instancetype_test.go deleted file mode 100644 index f313029..0000000 --- a/v1/providers/sfcompute/instancetype_test.go +++ /dev/null @@ -1,166 +0,0 @@ -package v1 - -// import ( -// "context" -// "fmt" -// "testing" -// "time" - -// "github.com/brevdev/cloud/internal/ssh" -// v1 "github.com/brevdev/cloud/v1" -// "github.com/google/uuid" -// ) - -// func TestGetInstanceTypes(t *testing.T) { -// t.Parallel() -// checkSkip(t) -// apiKey := getAPIKey() - -// credential := NewSFCCredential("validation-test", apiKey) -// client, err := credential.MakeClient(context.Background(), "eu-north1") -// if err != nil { -// t.Fatalf("failed to make client: %v", err) -// } - -// locations, err := client.GetLocations(context.Background(), v1.GetLocationsArgs{ -// IncludeUnavailable: true, -// }) -// if err != nil { -// t.Fatalf("failed to get locations: %v", err) -// } - -// t.Logf("locations: %v", locations) - -// instanceTypes, err := client.GetInstanceTypes(context.Background(), v1.GetInstanceTypeArgs{ -// Locations: v1.LocationsFilter{"all"}, -// }) -// if err != nil { -// t.Fatalf("failed to get instance types: %v", err) -// } - -// t.Logf("instance types: %v", instanceTypes) -// } - -// func TestCreateInstance(t *testing.T) { -// t.Parallel() -// checkSkip(t) -// apiKey := getAPIKey() - -// credential := NewSFCCredential("validation-test", apiKey) -// client, err := credential.MakeClient(context.Background(), "eu-north1") -// if err != nil { -// t.Fatalf("failed to make client: %v", err) -// } - -// id := uuid.New().String() - -// instance, err := client.CreateInstance(context.Background(), v1.CreateInstanceAttrs{ -// Name: "test", -// RefID: id, -// PublicKey: ssh.GetTestPublicKey(), -// InstanceType: "h100", -// Location: "hayesvalley", -// }) -// if err != nil { -// t.Fatalf("failed to create instance: %v", err) -// } - -// t.Logf("instance: %v", instance) -// } - -// func TestGetInstance(t *testing.T) { -// t.Parallel() -// checkSkip(t) -// apiKey := getAPIKey() - -// credential := NewSFCCredential("validation-test", apiKey) -// client, err := credential.MakeClient(context.Background(), "") -// if err != nil { -// t.Fatalf("failed to make client: %v", err) -// } - -// instance, err := client.GetInstance(context.Background(), "6c7a3ade-1e59-4e04-af6e-365046995a81_test") -// if err != nil { -// t.Fatalf("failed to get instance: %v", err) -// } - -// t.Logf("instance: %v", instance) - -// // status -// t.Logf("status: %v", instance.Status) - -// // ssh details -// t.Logf("ssh details: %v,%v,%v", instance.SSHUser, instance.SSHPort, instance.PublicIP) -// } - -// func TestSSHInstance(t *testing.T) { -// t.Parallel() -// checkSkip(t) -// apiKey := getAPIKey() - -// credential := NewSFCCredential("validation-test", apiKey) -// client, err := credential.MakeClient(context.Background(), "") -// if err != nil { -// t.Fatalf("failed to make client: %v", err) -// } - -// instance, err := client.GetInstance(context.Background(), "6c7a3ade-1e59-4e04-af6e-365046995a81_test") -// if err != nil { -// t.Fatalf("failed to get instance: %v", err) -// } - -// t.Logf("instance: %v", instance) - -// // ssh details -// t.Logf("ssh details: %v,%v,%v", instance.SSHUser, instance.SSHPort, instance.PublicIP) - -// // ssh to instance -// err = ssh.WaitForSSH(context.Background(), ssh.ConnectionConfig{ -// User: "root", -// HostPort: fmt.Sprintf("%s:%d", instance.PublicIP, instance.SSHPort), -// PrivKey: ssh.GetTestPrivateKey(), -// }, ssh.WaitForSSHOptions{ -// Timeout: 10 * time.Second, -// }) -// if err != nil { -// t.Fatalf("failed to wait for SSH: %v", err) -// } - -// t.Logf("SSH connection validated successfully for %s@%s:%d", instance.SSHUser, instance.PublicIP, instance.SSHPort) -// } - -// func TestListInstances(t *testing.T) { -// t.Parallel() -// checkSkip(t) -// apiKey := getAPIKey() - -// credential := NewSFCCredential("validation-test", apiKey) -// client, err := credential.MakeClient(context.Background(), "") -// if err != nil { -// t.Fatalf("failed to make client: %v", err) -// } - -// instances, err := client.ListInstances(context.Background(), v1.ListInstancesArgs{}) -// if err != nil { -// t.Fatalf("failed to list instances: %v", err) -// } - -// t.Logf("instances: %v", instances) -// } - -// func TestTerminateInstance(t *testing.T) { -// t.Parallel() -// checkSkip(t) -// apiKey := getAPIKey() - -// credential := NewSFCCredential("validation-test", apiKey) -// client, err := credential.MakeClient(context.Background(), "") -// if err != nil { -// t.Fatalf("failed to make client: %v", err) -// } - -// err = client.TerminateInstance(context.Background(), "6c7a3ade-1e59-4e04-af6e-365046995a81_test") -// if err != nil { -// t.Fatalf("failed to terminate instance: %v", err) -// } -// } diff --git a/v1/providers/sfcompute/scripts/instancetype_test.go b/v1/providers/sfcompute/scripts/instancetype_test.go new file mode 100644 index 0000000..ea1c4c5 --- /dev/null +++ b/v1/providers/sfcompute/scripts/instancetype_test.go @@ -0,0 +1,169 @@ +//go:build scripts +// +build scripts + +package scripts + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/brevdev/cloud/internal/ssh" + v1 "github.com/brevdev/cloud/v1" + "github.com/google/uuid" +) + +func TestGetInstanceTypes(t *testing.T) { + t.Parallel() + checkSkip(t) + apiKey := getAPIKey() + + credential := NewSFCCredential("validation-test", apiKey) + client, err := credential.MakeClient(context.Background(), "eu-north1") + if err != nil { + t.Fatalf("failed to make client: %v", err) + } + + locations, err := client.GetLocations(context.Background(), v1.GetLocationsArgs{ + IncludeUnavailable: true, + }) + if err != nil { + t.Fatalf("failed to get locations: %v", err) + } + + t.Logf("locations: %v", locations) + + instanceTypes, err := client.GetInstanceTypes(context.Background(), v1.GetInstanceTypeArgs{ + Locations: v1.LocationsFilter{"all"}, + }) + if err != nil { + t.Fatalf("failed to get instance types: %v", err) + } + + t.Logf("instance types: %v", instanceTypes) +} + +func TestCreateInstance(t *testing.T) { + t.Parallel() + checkSkip(t) + apiKey := getAPIKey() + + credential := NewSFCCredential("validation-test", apiKey) + client, err := credential.MakeClient(context.Background(), "eu-north1") + if err != nil { + t.Fatalf("failed to make client: %v", err) + } + + id := uuid.New().String() + + instance, err := client.CreateInstance(context.Background(), v1.CreateInstanceAttrs{ + Name: "test", + RefID: id, + PublicKey: ssh.GetTestPublicKey(), + InstanceType: "h100", + Location: "hayesvalley", + }) + if err != nil { + t.Fatalf("failed to create instance: %v", err) + } + + t.Logf("instance: %v", instance) +} + +func TestGetInstance(t *testing.T) { + t.Parallel() + checkSkip(t) + apiKey := getAPIKey() + + credential := NewSFCCredential("validation-test", apiKey) + client, err := credential.MakeClient(context.Background(), "") + if err != nil { + t.Fatalf("failed to make client: %v", err) + } + + instance, err := client.GetInstance(context.Background(), "6c7a3ade-1e59-4e04-af6e-365046995a81_test") + if err != nil { + t.Fatalf("failed to get instance: %v", err) + } + + t.Logf("instance: %v", instance) + + // status + t.Logf("status: %v", instance.Status) + + // ssh details + t.Logf("ssh details: %v,%v,%v", instance.SSHUser, instance.SSHPort, instance.PublicIP) +} + +func TestSSHInstance(t *testing.T) { + t.Parallel() + checkSkip(t) + apiKey := getAPIKey() + + credential := NewSFCCredential("validation-test", apiKey) + client, err := credential.MakeClient(context.Background(), "") + if err != nil { + t.Fatalf("failed to make client: %v", err) + } + + instance, err := client.GetInstance(context.Background(), "6c7a3ade-1e59-4e04-af6e-365046995a81_test") + if err != nil { + t.Fatalf("failed to get instance: %v", err) + } + + t.Logf("instance: %v", instance) + + // ssh details + t.Logf("ssh details: %v,%v,%v", instance.SSHUser, instance.SSHPort, instance.PublicIP) + + // ssh to instance + err = ssh.WaitForSSH(context.Background(), ssh.ConnectionConfig{ + User: "root", + HostPort: fmt.Sprintf("%s:%d", instance.PublicIP, instance.SSHPort), + PrivKey: ssh.GetTestPrivateKey(), + }, ssh.WaitForSSHOptions{ + Timeout: 10 * time.Second, + }) + if err != nil { + t.Fatalf("failed to wait for SSH: %v", err) + } + + t.Logf("SSH connection validated successfully for %s@%s:%d", instance.SSHUser, instance.PublicIP, instance.SSHPort) +} + +func TestListInstances(t *testing.T) { + t.Parallel() + checkSkip(t) + apiKey := getAPIKey() + + credential := NewSFCCredential("validation-test", apiKey) + client, err := credential.MakeClient(context.Background(), "") + if err != nil { + t.Fatalf("failed to make client: %v", err) + } + + instances, err := client.ListInstances(context.Background(), v1.ListInstancesArgs{}) + if err != nil { + t.Fatalf("failed to list instances: %v", err) + } + + t.Logf("instances: %v", instances) +} + +func TestTerminateInstance(t *testing.T) { + t.Parallel() + checkSkip(t) + apiKey := getAPIKey() + + credential := NewSFCCredential("validation-test", apiKey) + client, err := credential.MakeClient(context.Background(), "") + if err != nil { + t.Fatalf("failed to make client: %v", err) + } + + err = client.TerminateInstance(context.Background(), "6c7a3ade-1e59-4e04-af6e-365046995a81_test") + if err != nil { + t.Fatalf("failed to terminate instance: %v", err) + } +} diff --git a/v1/providers/sfcompute/validation_test.go b/v1/providers/sfcompute/validation_test.go index 1dd6adc..196c739 100644 --- a/v1/providers/sfcompute/validation_test.go +++ b/v1/providers/sfcompute/validation_test.go @@ -39,10 +39,10 @@ func TestInstanceLifecycleValidation(t *testing.T) { func checkSkip(t *testing.T) { apiKey := getAPIKey() - isValidation := os.Getenv("VALIDATION_TEST") - if apiKey == "" && isValidation != "true" { + isValidationTest := os.Getenv("VALIDATION_TEST") + if apiKey == "" && isValidationTest != "" { t.Fatal("SFCOMPUTE_API_KEY not set, but VALIDATION_TEST is set") - } else if apiKey == "" && isValidation == "false" { + } else if apiKey == "" && isValidationTest == "" { t.Skip("SFCOMPUTE_API_KEY not set, skipping sfcompute validation tests") } } From 28f5db52b860e2a346c26cdbd372e1f2f5fabba7 Mon Sep 17 00:00:00 2001 From: Drew Malin Date: Thu, 5 Feb 2026 15:43:00 -0800 Subject: [PATCH 08/17] make client with options --- v1/providers/sfcompute/client.go | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/v1/providers/sfcompute/client.go b/v1/providers/sfcompute/client.go index 27f16d4..3e9b2c1 100644 --- a/v1/providers/sfcompute/client.go +++ b/v1/providers/sfcompute/client.go @@ -42,10 +42,6 @@ func (c *SFCCredential) GetTenantID() (string, error) { return "", nil } -func (c *SFCCredential) MakeClient(ctx context.Context, location string) (v1.CloudClient, error) { - return NewSFCClient(c.RefID, c.APIKey).MakeClient(ctx, location) -} - type SFCClient struct { v1.NotImplCloudClient refID string @@ -65,19 +61,24 @@ func WithLogger(logger v1.Logger) SFCClientOption { } } -func NewSFCClient(refID string, apiKey string, opts ...SFCClientOption) *SFCClient { +func (c *SFCCredential) MakeClientWithOptions(ctx context.Context, location string, opts ...SFCClientOption) (v1.CloudClient, error) { sfcClient := &SFCClient{ - refID: refID, - apiKey: apiKey, - client: sfcnodes.NewClient(option.WithBearerToken(apiKey)), - logger: &v1.NoopLogger{}, + refID: c.RefID, + apiKey: c.APIKey, + client: sfcnodes.NewClient(option.WithBearerToken(c.APIKey)), + location: location, + logger: &v1.NoopLogger{}, } for _, opt := range opts { opt(sfcClient) } - return sfcClient + return sfcClient, nil +} + +func (c *SFCCredential) MakeClient(ctx context.Context, location string) (v1.CloudClient, error) { + return c.MakeClientWithOptions(ctx, location) } func (c *SFCClient) GetAPIType() v1.APIType { From 6a14f4305f41d4205086091ac1107d696ec6ee10 Mon Sep 17 00:00:00 2001 From: Drew Malin Date: Thu, 5 Feb 2026 17:41:44 -0800 Subject: [PATCH 09/17] cleanup --- v1/providers/sfcompute/client.go | 2 +- v1/providers/sfcompute/instance.go | 29 ++++++++----------- v1/providers/sfcompute/instancetype.go | 39 +++++++++++++++++++++----- 3 files changed, 44 insertions(+), 26 deletions(-) diff --git a/v1/providers/sfcompute/client.go b/v1/providers/sfcompute/client.go index 3e9b2c1..7dc2031 100644 --- a/v1/providers/sfcompute/client.go +++ b/v1/providers/sfcompute/client.go @@ -61,7 +61,7 @@ func WithLogger(logger v1.Logger) SFCClientOption { } } -func (c *SFCCredential) MakeClientWithOptions(ctx context.Context, location string, opts ...SFCClientOption) (v1.CloudClient, error) { +func (c *SFCCredential) MakeClientWithOptions(_ context.Context, location string, opts ...SFCClientOption) (v1.CloudClient, error) { sfcClient := &SFCClient{ refID: c.RefID, apiKey: c.APIKey, diff --git a/v1/providers/sfcompute/instance.go b/v1/providers/sfcompute/instance.go index 68baecf..837b2ea 100644 --- a/v1/providers/sfcompute/instance.go +++ b/v1/providers/sfcompute/instance.go @@ -185,7 +185,10 @@ func (c *SFCClient) sfcNodeToBrevInstance(node sfcNodeInfo) (*v1.Instance, error } // Get the instance type for the zone - instanceType := getInstanceTypeForZone(*node.zone) + instanceType, err := getInstanceTypeForZone(*node.zone) + if err != nil { + return nil, errors.WrapAndTrace(err) + } // Create the instance inst := &v1.Instance{ @@ -213,18 +216,14 @@ func (c *SFCClient) sfcNodeToBrevInstance(node sfcNodeInfo) (*v1.Instance, error } func (c *SFCClient) sfcNodeInfoFromNode(ctx context.Context, node *sfcnodes.Node, zone *sfcnodes.ZoneListResponseData) (*sfcNodeInfo, error) { - var sshUsername string var sshHostname string - if len(node.VMs.Data) == 1 { //nolint:gocritic // ok - username, hostname, err := c.getSSHDetailsFromVM(ctx, node.VMs.Data[0].ID, node.VMs.Data[0].Status) + hostname, err := c.getSSHHostnameFromVM(ctx, node.VMs.Data[0].ID, node.VMs.Data[0].Status) if err != nil { return nil, errors.WrapAndTrace(err) } - sshUsername = username sshHostname = hostname } else if len(node.VMs.Data) == 0 { - sshUsername = "" sshHostname = "" } else { return nil, errors.WrapAndTrace(fmt.Errorf("multiple VMs found for node %s", node.ID)) @@ -236,7 +235,7 @@ func (c *SFCClient) sfcNodeInfoFromNode(ctx context.Context, node *sfcnodes.Node createdAt: time.Unix(node.CreatedAt, 0), status: sfcStatusToLifecycleStatus(fmt.Sprint(node.Status)), gpuType: string(node.GPUType), - sshUsername: sshUsername, + sshUsername: defaultSSHUsername, sshHostname: sshHostname, zone: zone, }, nil @@ -308,25 +307,19 @@ func sfcStatusToLifecycleStatus(status string) v1.LifecycleStatus { } } -func (c *SFCClient) getSSHDetailsFromVM(ctx context.Context, vmID string, vmStatus string) (string, string, error) { - var sshUsername string - var sshHostname string - +func (c *SFCClient) getSSHHostnameFromVM(ctx context.Context, vmID string, vmStatus string) (string, error) { // If the VM is not running, set the SSH username and hostname to empty strings if strings.ToLower(vmStatus) != "running" { - return "", "", nil + return "", nil } // If the VM is running, get the SSH username and hostname sshResponse, err := c.client.VMs.SSH(ctx, sfcnodes.VMSSHParams{VMID: vmID}) if err != nil { - return "", "", errors.WrapAndTrace(err) + return "", errors.WrapAndTrace(err) } - sshUsername = defaultSSHUsername - sshHostname = sshResponse.SSHHostname - - return sshUsername, sshHostname, nil + return sshResponse.SSHHostname, nil } func brevDataToSFCName(refID string, name string) string { @@ -334,7 +327,7 @@ func brevDataToSFCName(refID string, name string) string { } func sfcNameToBrevData(name string) (string, string, error) { - parts := strings.Split(name, "_") + parts := strings.SplitAfterN(name, "_", 2) if len(parts) != 2 { return "", "", errors.WrapAndTrace(fmt.Errorf("invalid node name %s", name)) } diff --git a/v1/providers/sfcompute/instancetype.go b/v1/providers/sfcompute/instancetype.go index 9812dcd..173e550 100644 --- a/v1/providers/sfcompute/instancetype.go +++ b/v1/providers/sfcompute/instancetype.go @@ -7,6 +7,7 @@ import ( "strings" "time" + "github.com/alecthomas/units" "github.com/bojanz/currency" sfcnodes "github.com/sfcompute/nodes-go" @@ -69,22 +70,44 @@ func (c *SFCClient) GetInstanceTypes(ctx context.Context, args v1.GetInstanceTyp continue } - instanceType := getInstanceTypeForZone(zone) + instanceType, err := getInstanceTypeForZone(zone) + if err != nil { + return nil, err + } - if v1.IsSelectedByArgs(instanceType, args) { - instanceTypes = append(instanceTypes, instanceType) + if v1.IsSelectedByArgs(*instanceType, args) { + instanceTypes = append(instanceTypes, *instanceType) } } return instanceTypes, nil } -func getInstanceTypeForZone(zone sfcnodes.ZoneListResponseData) v1.InstanceType { +func getInstanceTypeForZone(zone sfcnodes.ZoneListResponseData) (*v1.InstanceType, error) { gpuType := strings.ToLower(string(zone.HardwareType)) + ramInt64, err := defaultRAMPerNode.ByteCountInUnitInt64(v1.Gibibyte) + if err != nil { + return nil, err + } + ram := units.Base2Bytes(ramInt64 * int64(units.Gibibyte)) + + memoryInt64, err := gpuToVRAM[gpuType].ByteCountInUnitInt64(v1.Gibibyte) + if err != nil { + return nil, err + } + memory := units.Base2Bytes(memoryInt64 * int64(units.Gibibyte)) + + diskSizeInt64, err := defaultStoragePerNode.ByteCountInUnitInt64(v1.Gibibyte) + if err != nil { + return nil, err + } + diskSize := units.Base2Bytes(diskSizeInt64 * int64(units.Gibibyte)) + instanceType := v1.InstanceType{ IsAvailable: true, Type: makeInstanceTypeName(zone), + Memory: ram, MemoryBytes: defaultRAMPerNode, Location: zoneToLocation(zone).Name, Stoppable: false, @@ -95,15 +118,17 @@ func getInstanceTypeForZone(zone sfcnodes.ZoneListResponseData) v1.InstanceType EstimatedDeployTime: &defaultProvisioningTime, SupportedGPUs: []v1.GPU{{ Count: defaultGPUCountPerNode, - Type: gpuType, + Type: strings.ToUpper(gpuType), Manufacturer: v1.GetManufacturer(defaultGPUManufacturer), - Name: gpuType, + Name: strings.ToUpper(gpuType), + Memory: memory, MemoryBytes: gpuToVRAM[gpuType], NetworkDetails: gpuToFormFactor[gpuType], }}, SupportedStorage: []v1.Storage{{ Type: "ssd", Count: 1, + Size: diskSize, SizeBytes: defaultStoragePerNode, }}, SupportedArchitectures: []v1.Architecture{gpuToArchitecture[gpuType]}, @@ -111,7 +136,7 @@ func getInstanceTypeForZone(zone sfcnodes.ZoneListResponseData) v1.InstanceType instanceType.ID = v1.MakeGenericInstanceTypeID(instanceType) - return instanceType + return &instanceType, nil } func gpuTypeIsAllowed(gpuType string) bool { From ed857ecdbea5a81ca4f94c2bc1dd71a4973aeeb5 Mon Sep 17 00:00:00 2001 From: Drew Malin Date: Thu, 5 Feb 2026 20:21:56 -0800 Subject: [PATCH 10/17] logs --- v1/providers/sfcompute/instance.go | 43 ++++++++++++++++++- v1/providers/sfcompute/instancetype.go | 25 ++++++++++- .../sfcompute/scripts/instancetype_test.go | 7 ++- 3 files changed, 71 insertions(+), 4 deletions(-) diff --git a/v1/providers/sfcompute/instance.go b/v1/providers/sfcompute/instance.go index 837b2ea..5d29e26 100644 --- a/v1/providers/sfcompute/instance.go +++ b/v1/providers/sfcompute/instance.go @@ -63,6 +63,11 @@ func sshKeyCloudInit(sshKey string) string { } func (c *SFCClient) GetInstance(ctx context.Context, id v1.CloudProviderInstanceID) (*v1.Instance, error) { + c.logger.Debug(ctx, "sfc: GetInstance start", + v1.LogField("instanceID", id), + v1.LogField("location", c.location), + ) + // Get the node from the API node, err := c.client.Nodes.Get(ctx, string(id)) if err != nil { @@ -84,6 +89,12 @@ func (c *SFCClient) GetInstance(ctx context.Context, id v1.CloudProviderInstance if err != nil { return nil, errors.WrapAndTrace(err) } + + c.logger.Debug(ctx, "sfc: GetInstance end", + v1.LogField("instanceID", id), + v1.LogField("instance", instance), + ) + return instance, nil } @@ -113,11 +124,20 @@ func (c *SFCClient) getZone(ctx context.Context, location string, includeUnavail } func (c *SFCClient) ListInstances(ctx context.Context, args v1.ListInstancesArgs) ([]v1.Instance, error) { + c.logger.Debug(ctx, "sfc: ListInstances start", + v1.LogField("location", c.location), + v1.LogField("args", fmt.Sprintf("%+v", args)), + ) + resp, err := c.client.Nodes.List(ctx, sfcnodes.NodeListParams{}) if err != nil { - return nil, err + return nil, errors.WrapAndTrace(err) } + c.logger.Debug(ctx, "sfc: ListInstances nodes list", + v1.LogField("node count", len(resp.Data)), + ) + zoneCache := make(map[string]*sfcnodes.ZoneListResponseData) var instances []v1.Instance @@ -135,11 +155,19 @@ func (c *SFCClient) ListInstances(ctx context.Context, args v1.ListInstancesArgs // Filter by locations if args.Locations != nil && !args.Locations.IsAllowed(zone.Name) { + c.logger.Debug(ctx, "sfc: ListInstances node filtered out by location", + v1.LogField("nodeID", node.ID), + v1.LogField("location", zone.Name), + ) continue } // Filter by instance IDs if args.InstanceIDs != nil && !slices.Contains(args.InstanceIDs, v1.CloudProviderInstanceID(node.ID)) { + c.logger.Debug(ctx, "sfc: ListInstances node filtered out by instance ID", + v1.LogField("nodeID", node.ID), + v1.LogField("instanceID", v1.CloudProviderInstanceID(node.ID)), + ) continue } @@ -155,14 +183,27 @@ func (c *SFCClient) ListInstances(ctx context.Context, args v1.ListInstancesArgs instances = append(instances, *inst) } + c.logger.Debug(ctx, "sfc: ListInstances end", + v1.LogField("instance count", len(instances)), + ) + return instances, nil } func (c *SFCClient) TerminateInstance(ctx context.Context, id v1.CloudProviderInstanceID) error { + c.logger.Debug(ctx, "sfc: TerminateInstance start", + v1.LogField("instanceID", id), + ) + _, err := c.client.Nodes.Release(ctx, string(id)) if err != nil { return errors.WrapAndTrace(err) } + + c.logger.Debug(ctx, "sfc: TerminateInstance end", + v1.LogField("instanceID", id), + ) + return nil } diff --git a/v1/providers/sfcompute/instancetype.go b/v1/providers/sfcompute/instancetype.go index 173e550..cb85108 100644 --- a/v1/providers/sfcompute/instancetype.go +++ b/v1/providers/sfcompute/instancetype.go @@ -55,6 +55,11 @@ func makeDefaultInstanceTypePrice(amount string, currencyCode string) currency.A } func (c *SFCClient) GetInstanceTypes(ctx context.Context, args v1.GetInstanceTypeArgs) ([]v1.InstanceType, error) { + c.logger.Debug(ctx, "sfc: GetInstanceTypes start", + v1.LogField("location", c.location), + v1.LogField("args", fmt.Sprintf("%+v", args)), + ) + // Fetch all available zones includeUnavailable := false zones, err := c.getZones(ctx, includeUnavailable) @@ -62,11 +67,18 @@ func (c *SFCClient) GetInstanceTypes(ctx context.Context, args v1.GetInstanceTyp return nil, err } + c.logger.Debug(ctx, "sfc: GetInstanceTypes zones list", + v1.LogField("zone count", len(zones)), + ) + instanceTypes := make([]v1.InstanceType, 0, len(zones)) for _, zone := range zones { gpuType := strings.ToLower(string(zone.HardwareType)) if !gpuTypeIsAllowed(gpuType) { + c.logger.Debug(ctx, "sfc: GetInstanceTypes gpu type not allowed", + v1.LogField("gpuType", gpuType), + ) continue } @@ -75,11 +87,20 @@ func (c *SFCClient) GetInstanceTypes(ctx context.Context, args v1.GetInstanceTyp return nil, err } - if v1.IsSelectedByArgs(*instanceType, args) { - instanceTypes = append(instanceTypes, *instanceType) + if !v1.IsSelectedByArgs(*instanceType, args) { + c.logger.Debug(ctx, "sfc: GetInstanceTypes instance type not selected by args", + v1.LogField("instanceType", instanceType.Type), + ) + continue } + + instanceTypes = append(instanceTypes, *instanceType) } + c.logger.Debug(ctx, "sfc: GetInstanceTypes end", + v1.LogField("instanceType count", len(instanceTypes)), + ) + return instanceTypes, nil } diff --git a/v1/providers/sfcompute/scripts/instancetype_test.go b/v1/providers/sfcompute/scripts/instancetype_test.go index ea1c4c5..d320b3a 100644 --- a/v1/providers/sfcompute/scripts/instancetype_test.go +++ b/v1/providers/sfcompute/scripts/instancetype_test.go @@ -143,7 +143,12 @@ func TestListInstances(t *testing.T) { t.Fatalf("failed to make client: %v", err) } - instances, err := client.ListInstances(context.Background(), v1.ListInstancesArgs{}) + instances, err := client.ListInstances(context.Background(), v1.ListInstancesArgs{ + TagFilters: map[string][]string{ + "dev-plane-managedBy": {"dev-plane"}, + }, + Locations: v1.All, + }) if err != nil { t.Fatalf("failed to list instances: %v", err) } From b2ae954f11583b70c1f03412aea8e6cbf94603a6 Mon Sep 17 00:00:00 2001 From: Drew Malin Date: Thu, 5 Feb 2026 20:37:48 -0800 Subject: [PATCH 11/17] bug fix --- v1/providers/sfcompute/instance.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/v1/providers/sfcompute/instance.go b/v1/providers/sfcompute/instance.go index 5d29e26..78cd116 100644 --- a/v1/providers/sfcompute/instance.go +++ b/v1/providers/sfcompute/instance.go @@ -163,7 +163,7 @@ func (c *SFCClient) ListInstances(ctx context.Context, args v1.ListInstancesArgs } // Filter by instance IDs - if args.InstanceIDs != nil && !slices.Contains(args.InstanceIDs, v1.CloudProviderInstanceID(node.ID)) { + if len(args.InstanceIDs) > 0 && !slices.Contains(args.InstanceIDs, v1.CloudProviderInstanceID(node.ID)) { c.logger.Debug(ctx, "sfc: ListInstances node filtered out by instance ID", v1.LogField("nodeID", node.ID), v1.LogField("instanceID", v1.CloudProviderInstanceID(node.ID)), From 4f6c85cca22960bd9ba73a77d16206471ef0421d Mon Sep 17 00:00:00 2001 From: Drew Malin Date: Thu, 5 Feb 2026 20:56:44 -0800 Subject: [PATCH 12/17] bug fix --- v1/instance_validation.go | 29 +++++++++++++++++++++++++++++ v1/providers/sfcompute/instance.go | 4 ++-- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/v1/instance_validation.go b/v1/instance_validation.go index b2f16ec..4ecfccf 100644 --- a/v1/instance_validation.go +++ b/v1/instance_validation.go @@ -58,6 +58,7 @@ func ValidateCreateInstance(ctx context.Context, client CloudCreateTerminateInst } func ValidateListCreatedInstance(ctx context.Context, client CloudCreateTerminateInstance, i *Instance) error { + // List instances by location and search for the instance by CloudID ins, err := client.ListInstances(ctx, ListInstancesArgs{ Locations: []string{i.Location}, }) @@ -71,6 +72,34 @@ func ValidateListCreatedInstance(ctx context.Context, client CloudCreateTerminat foundInstance := collections.Find(ins, func(inst Instance) bool { return inst.CloudID == i.CloudID }) + validationErr = validateInstance(i, foundInstance) + if validationErr != nil { + return validationErr + } + + // List instances by instance ID and search for the instance by CloudID + ins, err = client.ListInstances(ctx, ListInstancesArgs{ + InstanceIDs: []CloudProviderInstanceID{i.CloudID}, + }) + if err != nil { + return err + } + if len(ins) == 0 { + validationErr = errors.Join(validationErr, fmt.Errorf("instance not found: %s", i.CloudID)) + } + foundInstance = collections.Find(ins, func(inst Instance) bool { + return inst.CloudID == i.CloudID + }) + validationErr = validateInstance(i, foundInstance) + if validationErr != nil { + return validationErr + } + + return nil +} + +func validateInstance(i *Instance, foundInstance *Instance) error { + var validationErr error if foundInstance == nil { validationErr = errors.Join(validationErr, fmt.Errorf("instance not found: %s", i.CloudID)) return validationErr diff --git a/v1/providers/sfcompute/instance.go b/v1/providers/sfcompute/instance.go index 78cd116..60a5000 100644 --- a/v1/providers/sfcompute/instance.go +++ b/v1/providers/sfcompute/instance.go @@ -339,9 +339,9 @@ func sfcStatusToLifecycleStatus(status string) v1.LifecycleStatus { return v1.LifecycleStatusRunning case "stopped": return v1.LifecycleStatusStopped - case "terminating", "released": + case "terminating": return v1.LifecycleStatusTerminating - case "destroyed", "deleted": + case "released", "destroyed", "deleted": return v1.LifecycleStatusTerminated default: return v1.LifecycleStatusPending From 6e69e7782837df17aa5dd66d945ad74caa5539ca Mon Sep 17 00:00:00 2001 From: Drew Malin Date: Thu, 5 Feb 2026 21:03:10 -0800 Subject: [PATCH 13/17] bug fix --- v1/instance_validation.go | 19 +++++++++---------- v1/providers/sfcompute/instance.go | 2 +- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/v1/instance_validation.go b/v1/instance_validation.go index 4ecfccf..5b06f4e 100644 --- a/v1/instance_validation.go +++ b/v1/instance_validation.go @@ -65,16 +65,15 @@ func ValidateListCreatedInstance(ctx context.Context, client CloudCreateTerminat if err != nil { return err } - var validationErr error if len(ins) == 0 { - validationErr = errors.Join(validationErr, fmt.Errorf("no instances found")) + return fmt.Errorf("no instances found") } foundInstance := collections.Find(ins, func(inst Instance) bool { return inst.CloudID == i.CloudID }) - validationErr = validateInstance(i, foundInstance) - if validationErr != nil { - return validationErr + err = validateInstance(i, foundInstance) + if err != nil { + return err } // List instances by instance ID and search for the instance by CloudID @@ -85,16 +84,16 @@ func ValidateListCreatedInstance(ctx context.Context, client CloudCreateTerminat return err } if len(ins) == 0 { - validationErr = errors.Join(validationErr, fmt.Errorf("instance not found: %s", i.CloudID)) + return fmt.Errorf("instance not found: %s", i.CloudID) } + foundInstance = collections.Find(ins, func(inst Instance) bool { return inst.CloudID == i.CloudID }) - validationErr = validateInstance(i, foundInstance) - if validationErr != nil { - return validationErr + err = validateInstance(i, foundInstance) + if err != nil { + return err } - return nil } diff --git a/v1/providers/sfcompute/instance.go b/v1/providers/sfcompute/instance.go index 60a5000..9d09856 100644 --- a/v1/providers/sfcompute/instance.go +++ b/v1/providers/sfcompute/instance.go @@ -368,7 +368,7 @@ func brevDataToSFCName(refID string, name string) string { } func sfcNameToBrevData(name string) (string, string, error) { - parts := strings.SplitAfterN(name, "_", 2) + parts := strings.SplitN(name, "_", 2) if len(parts) != 2 { return "", "", errors.WrapAndTrace(fmt.Errorf("invalid node name %s", name)) } From 5a5c50abff9e8230099d1ccdb61a41d91f2b9dbd Mon Sep 17 00:00:00 2001 From: Drew Malin Date: Thu, 5 Feb 2026 21:38:46 -0800 Subject: [PATCH 14/17] specify instance disk using legacy units --- v1/providers/sfcompute/instance.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/v1/providers/sfcompute/instance.go b/v1/providers/sfcompute/instance.go index 9d09856..f82de35 100644 --- a/v1/providers/sfcompute/instance.go +++ b/v1/providers/sfcompute/instance.go @@ -8,6 +8,7 @@ import ( "strings" "time" + "github.com/alecthomas/units" "github.com/brevdev/cloud/internal/errors" v1 "github.com/brevdev/cloud/v1" sfcnodes "github.com/sfcompute/nodes-go" @@ -231,6 +232,12 @@ func (c *SFCClient) sfcNodeToBrevInstance(node sfcNodeInfo) (*v1.Instance, error return nil, errors.WrapAndTrace(err) } + diskSizeInt64, err := instanceType.SupportedStorage[0].SizeBytes.ByteCountInUnitInt64(v1.Gibibyte) + if err != nil { + return nil, err + } + diskSize := units.Base2Bytes(diskSizeInt64 * int64(units.Gibibyte)) + // Create the instance inst := &v1.Instance{ Name: name, @@ -241,6 +248,7 @@ func (c *SFCClient) sfcNodeToBrevInstance(node sfcNodeInfo) (*v1.Instance, error SSHUser: node.sshUsername, SSHPort: defaultPort, CreatedAt: node.createdAt, + DiskSize: diskSize, DiskSizeBytes: instanceType.SupportedStorage[0].SizeBytes, // TODO: this should be pulled from the node itself Status: v1.Status{ LifecycleStatus: node.status, From 0aa7960887b67b9e70c9bcd116d418134abe80f2 Mon Sep 17 00:00:00 2001 From: Drew Malin Date: Fri, 6 Feb 2026 08:36:33 -0800 Subject: [PATCH 15/17] cleanup --- v1/providers/sfcompute/instancetype.go | 109 +++++++++++++++++-------- 1 file changed, 73 insertions(+), 36 deletions(-) diff --git a/v1/providers/sfcompute/instancetype.go b/v1/providers/sfcompute/instancetype.go index cb85108..412e148 100644 --- a/v1/providers/sfcompute/instancetype.go +++ b/v1/providers/sfcompute/instancetype.go @@ -20,31 +20,11 @@ const ( deliveryTypeVM = "VM" interconnectInfiniband = "infiniband" + formFactorSXM5 = "sxm5" + diskTypeSSD = "ssd" ) -var ( - allowedZones = []string{"hayesvalley", "yerba"} - - gpuToVRAM = map[string]v1.Bytes{ - gpuTypeH100: v1.NewBytes(80, v1.Gigabyte), - gpuTypeH200: v1.NewBytes(141, v1.Gigabyte), - } - gpuToFormFactor = map[string]string{ - gpuTypeH100: "sxm5", - gpuTypeH200: "sxm5", - } - gpuToArchitecture = map[string]v1.Architecture{ - gpuTypeH100: v1.ArchitectureX86_64, - gpuTypeH200: v1.ArchitectureX86_64, - } - - defaultGPUCountPerNode = int32(8) - defaultGPUManufacturer = "nvidia" - defaultRAMPerNode = v1.NewBytes(960, v1.Gigabyte) - defaultStoragePerNode = v1.NewBytes(1500, v1.Gigabyte) - defaultProvisioningTime = 5 * time.Minute - defaultPricePerGPU = makeDefaultInstanceTypePrice("2.00", "USD") -) +var allowedZones = []string{"hayesvalley", "yerba"} func makeDefaultInstanceTypePrice(amount string, currencyCode string) currency.Amount { instanceTypePrice, err := currency.NewAmount(amount, currencyCode) @@ -107,19 +87,24 @@ func (c *SFCClient) GetInstanceTypes(ctx context.Context, args v1.GetInstanceTyp func getInstanceTypeForZone(zone sfcnodes.ZoneListResponseData) (*v1.InstanceType, error) { gpuType := strings.ToLower(string(zone.HardwareType)) - ramInt64, err := defaultRAMPerNode.ByteCountInUnitInt64(v1.Gibibyte) + gpuMetadata, err := getInstanceTypeMetadata(gpuType) + if err != nil { + return nil, err + } + + ramInt64, err := gpuMetadata.memoryBytes.ByteCountInUnitInt64(v1.Gibibyte) if err != nil { return nil, err } ram := units.Base2Bytes(ramInt64 * int64(units.Gibibyte)) - memoryInt64, err := gpuToVRAM[gpuType].ByteCountInUnitInt64(v1.Gibibyte) + memoryInt64, err := gpuMetadata.gpuVRAM.ByteCountInUnitInt64(v1.Gibibyte) if err != nil { return nil, err } memory := units.Base2Bytes(memoryInt64 * int64(units.Gibibyte)) - diskSizeInt64, err := defaultStoragePerNode.ByteCountInUnitInt64(v1.Gibibyte) + diskSizeInt64, err := gpuMetadata.diskBytes.ByteCountInUnitInt64(v1.Gibibyte) if err != nil { return nil, err } @@ -129,30 +114,30 @@ func getInstanceTypeForZone(zone sfcnodes.ZoneListResponseData) (*v1.InstanceTyp IsAvailable: true, Type: makeInstanceTypeName(zone), Memory: ram, - MemoryBytes: defaultRAMPerNode, + MemoryBytes: gpuMetadata.memoryBytes, Location: zoneToLocation(zone).Name, Stoppable: false, Rebootable: false, IsContainer: false, Provider: CloudProviderID, - BasePrice: &defaultPricePerGPU, - EstimatedDeployTime: &defaultProvisioningTime, + BasePrice: &gpuMetadata.pricePerGPU, + EstimatedDeployTime: &gpuMetadata.provisioningTime, SupportedGPUs: []v1.GPU{{ - Count: defaultGPUCountPerNode, + Count: gpuMetadata.gpuCount, Type: strings.ToUpper(gpuType), - Manufacturer: v1.GetManufacturer(defaultGPUManufacturer), + Manufacturer: gpuMetadata.gpuManufacturer, Name: strings.ToUpper(gpuType), Memory: memory, - MemoryBytes: gpuToVRAM[gpuType], - NetworkDetails: gpuToFormFactor[gpuType], + MemoryBytes: gpuMetadata.gpuVRAM, + NetworkDetails: gpuMetadata.formFactor, }}, SupportedStorage: []v1.Storage{{ - Type: "ssd", + Type: diskTypeSSD, Count: 1, Size: diskSize, - SizeBytes: defaultStoragePerNode, + SizeBytes: gpuMetadata.diskBytes, }}, - SupportedArchitectures: []v1.Architecture{gpuToArchitecture[gpuType]}, + SupportedArchitectures: []v1.Architecture{gpuMetadata.architecture}, } instanceType.ID = v1.MakeGenericInstanceTypeID(instanceType) @@ -230,3 +215,55 @@ func zoneToLocation(zone sfcnodes.ZoneListResponseData) v1.Location { Available: true, } } + +// sfcInstanceTypeMetadata is a struct that contains the metadata for a given instance type. +// These values are not currently provided by the SFCompute API, so we need to hardcode them. +type sfcInstanceTypeMetadata struct { + gpuType string + formFactor string + architecture v1.Architecture + memoryBytes v1.Bytes + diskBytes v1.Bytes + gpuCount int32 + gpuManufacturer v1.Manufacturer + gpuVRAM v1.Bytes + provisioningTime time.Duration + pricePerGPU currency.Amount +} + +func getInstanceTypeMetadata(gpuType string) (*sfcInstanceTypeMetadata, error) { + switch gpuType { + case gpuTypeH100: + return &h100InstanceTypeMetadata, nil + case gpuTypeH200: + return &h200InstanceTypeMetadata, nil + default: + return nil, fmt.Errorf("invalid GPU type: %s", gpuType) + } +} + +var h100InstanceTypeMetadata = sfcInstanceTypeMetadata{ + gpuType: gpuTypeH100, + formFactor: formFactorSXM5, + architecture: v1.ArchitectureX86_64, + memoryBytes: v1.NewBytes(960, v1.Gigabyte), + diskBytes: v1.NewBytes(1500, v1.Gigabyte), + gpuCount: 8, + gpuManufacturer: v1.ManufacturerNVIDIA, + gpuVRAM: v1.NewBytes(80, v1.Gigabyte), + provisioningTime: 5 * time.Minute, + pricePerGPU: makeDefaultInstanceTypePrice("2.00", "USD"), +} + +var h200InstanceTypeMetadata = sfcInstanceTypeMetadata{ + gpuType: gpuTypeH200, + formFactor: formFactorSXM5, + architecture: v1.ArchitectureX86_64, + memoryBytes: v1.NewBytes(960, v1.Gigabyte), + diskBytes: v1.NewBytes(1500, v1.Gigabyte), + gpuCount: 8, + gpuManufacturer: v1.ManufacturerNVIDIA, + gpuVRAM: v1.NewBytes(141, v1.Gigabyte), + provisioningTime: 5 * time.Minute, + pricePerGPU: makeDefaultInstanceTypePrice("2.00", "USD"), +} From 29104ff08aebb4a244b6dcc8828589f7a27549ba Mon Sep 17 00:00:00 2001 From: Drew Malin Date: Fri, 6 Feb 2026 09:14:27 -0800 Subject: [PATCH 16/17] cleanup --- v1/providers/sfcompute/instancetype.go | 64 +++++++++++++------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/v1/providers/sfcompute/instancetype.go b/v1/providers/sfcompute/instancetype.go index 412e148..6858e93 100644 --- a/v1/providers/sfcompute/instancetype.go +++ b/v1/providers/sfcompute/instancetype.go @@ -120,8 +120,8 @@ func getInstanceTypeForZone(zone sfcnodes.ZoneListResponseData) (*v1.InstanceTyp Rebootable: false, IsContainer: false, Provider: CloudProviderID, - BasePrice: &gpuMetadata.pricePerGPU, - EstimatedDeployTime: &gpuMetadata.provisioningTime, + BasePrice: &gpuMetadata.price, + EstimatedDeployTime: &gpuMetadata.estimatedDeployTime, SupportedGPUs: []v1.GPU{{ Count: gpuMetadata.gpuCount, Type: strings.ToUpper(gpuType), @@ -219,16 +219,16 @@ func zoneToLocation(zone sfcnodes.ZoneListResponseData) v1.Location { // sfcInstanceTypeMetadata is a struct that contains the metadata for a given instance type. // These values are not currently provided by the SFCompute API, so we need to hardcode them. type sfcInstanceTypeMetadata struct { - gpuType string - formFactor string - architecture v1.Architecture - memoryBytes v1.Bytes - diskBytes v1.Bytes - gpuCount int32 - gpuManufacturer v1.Manufacturer - gpuVRAM v1.Bytes - provisioningTime time.Duration - pricePerGPU currency.Amount + gpuType string + formFactor string + architecture v1.Architecture + memoryBytes v1.Bytes + diskBytes v1.Bytes + gpuCount int32 + gpuManufacturer v1.Manufacturer + gpuVRAM v1.Bytes + estimatedDeployTime time.Duration + price currency.Amount } func getInstanceTypeMetadata(gpuType string) (*sfcInstanceTypeMetadata, error) { @@ -243,27 +243,27 @@ func getInstanceTypeMetadata(gpuType string) (*sfcInstanceTypeMetadata, error) { } var h100InstanceTypeMetadata = sfcInstanceTypeMetadata{ - gpuType: gpuTypeH100, - formFactor: formFactorSXM5, - architecture: v1.ArchitectureX86_64, - memoryBytes: v1.NewBytes(960, v1.Gigabyte), - diskBytes: v1.NewBytes(1500, v1.Gigabyte), - gpuCount: 8, - gpuManufacturer: v1.ManufacturerNVIDIA, - gpuVRAM: v1.NewBytes(80, v1.Gigabyte), - provisioningTime: 5 * time.Minute, - pricePerGPU: makeDefaultInstanceTypePrice("2.00", "USD"), + gpuType: gpuTypeH100, + formFactor: formFactorSXM5, + architecture: v1.ArchitectureX86_64, + memoryBytes: v1.NewBytes(960, v1.Gigabyte), + diskBytes: v1.NewBytes(1500, v1.Gigabyte), + gpuCount: 8, + gpuManufacturer: v1.ManufacturerNVIDIA, + gpuVRAM: v1.NewBytes(80, v1.Gigabyte), + estimatedDeployTime: 14 * time.Minute, + price: makeDefaultInstanceTypePrice("16.00", "USD"), } var h200InstanceTypeMetadata = sfcInstanceTypeMetadata{ - gpuType: gpuTypeH200, - formFactor: formFactorSXM5, - architecture: v1.ArchitectureX86_64, - memoryBytes: v1.NewBytes(960, v1.Gigabyte), - diskBytes: v1.NewBytes(1500, v1.Gigabyte), - gpuCount: 8, - gpuManufacturer: v1.ManufacturerNVIDIA, - gpuVRAM: v1.NewBytes(141, v1.Gigabyte), - provisioningTime: 5 * time.Minute, - pricePerGPU: makeDefaultInstanceTypePrice("2.00", "USD"), + gpuType: gpuTypeH200, + formFactor: formFactorSXM5, + architecture: v1.ArchitectureX86_64, + memoryBytes: v1.NewBytes(960, v1.Gigabyte), + diskBytes: v1.NewBytes(1500, v1.Gigabyte), + gpuCount: 8, + gpuManufacturer: v1.ManufacturerNVIDIA, + gpuVRAM: v1.NewBytes(141, v1.Gigabyte), + estimatedDeployTime: 14 * time.Minute, + price: makeDefaultInstanceTypePrice("24.00", "USD"), } From 6654b02de052473713a45b3b4e388a5e2f8aa96b Mon Sep 17 00:00:00 2001 From: Drew Malin Date: Fri, 6 Feb 2026 12:31:19 -0800 Subject: [PATCH 17/17] feedback --- v1/providers/sfcompute/instance.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/v1/providers/sfcompute/instance.go b/v1/providers/sfcompute/instance.go index f82de35..77e282f 100644 --- a/v1/providers/sfcompute/instance.go +++ b/v1/providers/sfcompute/instance.go @@ -341,7 +341,7 @@ func sfcListResponseNodeDataToNode(node *sfcnodes.ListResponseNodeData) *sfcnode func sfcStatusToLifecycleStatus(status string) v1.LifecycleStatus { switch strings.ToLower(status) { - case "pending", "nodefailure", "unspecified", "awaitingcapacity", "unknown", "failed": + case "pending", "unspecified", "awaitingcapacity", "unknown": return v1.LifecycleStatusPending case "running": return v1.LifecycleStatusRunning @@ -351,6 +351,8 @@ func sfcStatusToLifecycleStatus(status string) v1.LifecycleStatus { return v1.LifecycleStatusTerminating case "released", "destroyed", "deleted": return v1.LifecycleStatusTerminated + case "nodefailure", "failed": + return v1.LifecycleStatusFailed default: return v1.LifecycleStatusPending }