diff --git a/.github/workflows/build-artifacts.yml b/.github/workflows/build-artifacts.yml index f5eafe5c1e..c8b68db670 100644 --- a/.github/workflows/build-artifacts.yml +++ b/.github/workflows/build-artifacts.yml @@ -114,9 +114,9 @@ jobs: - name: Check if go.mod and go.sum are up-to-date run: go mod tidy -diff - name: Run golangci-lint - uses: golangci/golangci-lint-action@v6 + uses: golangci/golangci-lint-action@v9 with: - version: v1.62.0 # Should match .pre-commit-config.yaml + version: v2.6.2 # Should match .pre-commit-config.yaml args: --timeout=20m working-directory: runner - name: Test diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2161a16081..b3475efda5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,11 +7,11 @@ repos: args: ['--fix'] - id: ruff-format - repo: https://github.com/golangci/golangci-lint - rev: v1.62.0 # Should match .github/workflows/build-artifacts.yml + rev: v2.6.2 # Should match .github/workflows/build-artifacts.yml hooks: - id: golangci-lint-full - language_version: 1.23.8 # Should match runner/go.mod - entry: bash -c 'cd runner && golangci-lint run' + language_version: 1.25.0 # Should match runner/go.mod + entry: bash -c 'cd runner && golangci-lint run --fix' stages: [manual] - repo: https://github.com/pre-commit/pre-commit-hooks rev: v5.0.0 diff --git a/runner/.golangci.yml b/runner/.golangci.yml index 435ecce783..d96d2595e9 100644 --- a/runner/.golangci.yml +++ b/runner/.golangci.yml @@ -1,320 +1,302 @@ +version: "2" + run: # default concurrency is a available CPU number concurrency: 4 - # timeout for analysis, e.g. 30s, 5m, default is 1m timeout: 1m - # exit code when at least one issue was found, default is 1 issues-exit-code: 1 - # include test files or not, default is true tests: false -# output configuration options -output: - # colored-line-number|line-number|json|tab|checkstyle|code-climate|junit-xml|github-actions - # default is "colored-line-number" - formats: - - format: colored-line-number - -# all available settings of specific linters -linters-settings: - - cyclop: - # the maximal code complexity to report - max-complexity: 10 - # the maximal average package complexity. If it's higher than 0.0 (float) the check is enabled (default 0.0) - package-average: 0.0 - # should ignore tests (default false) - skip-tests: false - - errcheck: - # report about not checking of errors in type assertions: `a := b.(MyStruct)`; - # default is false: such cases aren't reported by default. - check-type-assertions: false - - # report about assignment of errors to blank identifier: `num, _ := strconv.Atoi(numStr)`; - # default is false: such cases aren't reported by default. - check-blank: false - - errorlint: - # Check whether fmt.Errorf uses the %w verb for formatting errors. See the readme for caveats - errorf: true - # Check for plain type assertions and type switches - asserts: true - # Check for plain error comparisons - comparison: true - - exhaustive: - # check switch statements in generated files also - check-generated: false - # indicates that switch statements are to be considered exhaustive if a - # 'default' case is present, even if all enum members aren't listed in the - # switch - default-signifies-exhaustive: false - - funlen: - lines: 60 - statements: 40 - - gocognit: - # minimal code complexity to report, 30 by default (but we recommend 10-20) - min-complexity: 10 - - nestif: - # minimal complexity of if statements to report, 5 by default - min-complexity: 4 - - goconst: - # minimal length of string constant, 3 by default - min-len: 3 - # minimal occurrences count to trigger, 3 by default - min-occurrences: 3 - - gocritic: - # Which checks should be enabled; can't be combined with 'disabled-checks'; - # See https://go-critic.github.io/overview#checks-overview - # To check which checks are enabled run `GL_DEBUG=gocritic golangci-lint run` - # By default list of stable checks is used. - enabled-checks: - - rangeValCopy - - # Which checks should be disabled; can't be combined with 'enabled-checks'; default is empty - disabled-checks: - - regexpMust - - # Enable multiple checks by tags, run `GL_DEBUG=gocritic golangci-lint run` to see all tags and checks. - # Empty list by default. See https://github.com/go-critic/go-critic#usage -> section "Tags". - enabled-tags: - - performance - disabled-tags: - - experimental - - # Settings passed to gocritic. - # The settings key is the name of a supported gocritic checker. - # The list of supported checkers can be find in https://go-critic.github.io/overview. - settings: - captLocal: # must be valid enabled check name - # whether to restrict checker to params only (default true) - paramsOnly: true - elseif: - # whether to skip balanced if-else pairs (default true) - skipBalanced: true - hugeParam: - # size in bytes that makes the warning trigger (default 80) - sizeThreshold: 80 - nestingReduce: - # min number of statements inside a branch to trigger a warning (default 5) - bodyWidth: 5 - rangeExprCopy: - # size in bytes that makes the warning trigger (default 512) - sizeThreshold: 512 - # whether to check test functions (default true) - skipTestFuncs: true - rangeValCopy: - # size in bytes that makes the warning trigger (default 128) - sizeThreshold: 32 - # whether to check test functions (default true) - skipTestFuncs: true - truncateCmp: - # whether to skip int/uint/uintptr types (default true) - skipArchDependent: true - underef: - # whether to skip (*x).method() calls where x is a pointer receiver (default true) - skipRecvDeref: true - unnamedResult: - # whether to check exported functions - checkExported: true - - gocyclo: - # minimal code complexity to report, 30 by default (but we recommend 10-20) - min-complexity: 10 - - gofmt: - # simplify code: gofmt with `-s` option, true by default - simplify: true - - gofumpt: - # Choose whether or not to use the extra rules that are disabled - # by default - extra-rules: false - - goimports: - # put imports beginning with prefix after 3rd-party packages; - # it's a comma-separated list of prefixes - local-prefixes: github.com/dstackai/dstackai - - gosec: - # To select a subset of rules to run. - # Available rules: https://github.com/securego/gosec#available-rules - includes: - - G401 - - G306 - - G101 - # To specify a set of rules to explicitly exclude. - # Available rules: https://github.com/securego/gosec#available-rules - excludes: - - G204 - # To specify the configuration of rules. - # The configuration of rules is not fully documented by gosec: - # https://github.com/securego/gosec#configuration - # https://github.com/securego/gosec/blob/569328eade2ccbad4ce2d0f21ee158ab5356a5cf/rules/rulelist.go#L60-L102 - config: - G306: "0600" - G101: - pattern: "(?i)example" - ignore_entropy: false - entropy_threshold: "80.0" - per_char_threshold: "3.0" - truncate: "32" - - gosimple: - # https://staticcheck.io/docs/options#checks - checks: [ "all" ] - - govet: - # settings per analyzer - settings: - printf: # analyzer name, run `go tool vet help` to see all analyzers - funcs: # run `go tool vet help printf` to see available settings for `printf` analyzer - - (github.com/golangci/golangci-lint/pkg/logutils.Log).Infof - - (github.com/golangci/golangci-lint/pkg/logutils.Log).Warnf - - (github.com/golangci/golangci-lint/pkg/logutils.Log).Errorf - - (github.com/golangci/golangci-lint/pkg/logutils.Log).Fatalf - - # enable or disable analyzers by name - # run `go tool vet help` to see all analyzers - enable-all: true - disable: - - shadow - - fieldalignment - - importas: - # if set to `true`, force to use alias. - no-unaliased: true - # List of aliases - alias: - # using `servingv1` alias for `knative.dev/serving/pkg/apis/serving/v1` package - - pkg: knative.dev/serving/pkg/apis/serving/v1 - alias: servingv1 - # using `autoscalingv1alpha1` alias for `knative.dev/serving/pkg/apis/autoscaling/v1alpha1` package - - pkg: knative.dev/serving/pkg/apis/autoscaling/v1alpha1 - alias: autoscalingv1alpha1 - # You can specify the package path by regular expression, - # and alias by regular expression expansion syntax like below. - # see https://github.com/julz/importas#use-regular-expression for details - - pkg: knative.dev/serving/pkg/apis/(\w+)/(v[\w\d]+) - alias: $1$2 - - lll: - # max line length, lines longer will be reported. Default is 120. - # '\t' is counted as 1 character by default, and can be changed with the tab-width option - line-length: 120 - # tab width in spaces. Default to 1. - tab-width: 1 - - staticcheck: - # https://staticcheck.io/docs/options#checks - checks: [ "all" ] - - stylecheck: - # https://staticcheck.io/docs/options#checks - checks: [ "all", "-ST1000", "-ST1003", "-ST1016", "-ST1020", "-ST1021", "-ST1022" ] - # https://staticcheck.io/docs/options#dot_import_whitelist - dot-import-whitelist: - - fmt - # https://staticcheck.io/docs/options#initialisms - initialisms: [ "ACL", "API", "ASCII", "CPU", "CSS", "DNS", "EOF", "GUID", "HTML", "HTTP", "HTTPS", "ID", "IP", "JSON", "QPS", "RAM", "RPC", "SLA", "SMTP", "SQL", "SSH", "TCP", "TLS", "TTL", "UDP", "UI", "GID", "UID", "UUID", "URI", "URL", "UTF8", "VM", "XML", "XMPP", "XSRF", "XSS" ] - # https://staticcheck.io/docs/options#http_status_code_whitelist - http-status-code-whitelist: [ "200", "400", "404", "500" ] - - tagliatelle: - # check the struck tag name case - case: - # use the struct field name to check the name of the struct tag - use-field-name: true - rules: - # any struct tag type can be used. - # support string case: `camel`, `pascal`, `kebab`, `snake`, `goCamel`, `goPascal`, `goKebab`, `goSnake`, `upper`, `lower` - json: snake - yaml: camel - xml: camel - bson: camel - avro: snake - mapstructure: kebab - - testpackage: - # regexp pattern to skip files - skip-regexp: (export|internal)_test\.go - - thelper: - # The following configurations enable all checks. It can be omitted because all checks are enabled by default. - # You can enable only required checks deleting unnecessary checks. - test: - first: true - name: true - begin: true - benchmark: - first: true - name: true - begin: true - tb: - first: true - name: true - begin: true - - whitespace: - multi-if: false # Enforces newlines (or comments) after every multi-line if statement - multi-func: false # Enforces newlines (or comments) after every multi-line function signatur - linters: - presets: - - bugs + default: none + enable: + # bugs + - asasalint + - asciicheck + - bidichk + - bodyclose + - durationcheck + - errcheck + - errchkjson + - errorlint + - exhaustive + - gocheckcompilerdirectives + - gochecksumtype + - gosec + - gosmopolitan + - govet + - loggercheck + - makezero + - musttag + - nilerr + - nilnesserr + - noctx + - protogetter + - reassign + - recvcheck + - rowserrcheck + - spancheck + - sqlclosecheck + - staticcheck + - testifylint + - zerologlint + # unused + - ineffassign - unused - - import - - module - - format - disable: - - depguard + # module + - gomoddirectives - gomodguard - - unparam - - contextcheck - fast: false - - -issues: - # Excluding configuration per-path, per-linter, per-text and per-source - exclude-rules: - # Exclude some linters from running on tests files. - - path: _test\.go - linters: - - gocyclo - - errcheck - - dupl - - gosec - - # Exclude known linters from partially hard-vendored code, - # which is impossible to exclude via "nolint" comments. - - path: internal/hmac/ - text: "weak cryptographic primitive" - linters: - - gosec - - # Exclude some staticcheck messages - - linters: - - staticcheck - text: "SA9003:" - - # Exclude lll issues for long lines with go:generate - - linters: - - lll - source: "^//go:generate " - - # The list of ids of default excludes to include or disable. By default it's empty. - include: - - EXC0002 # disable excluding of issues about comments from golint - + settings: + cyclop: + # the maximal code complexity to report + max-complexity: 10 + # the maximal average package complexity. If it's higher than 0.0 (float) the check is enabled (default 0.0) + package-average: 0.0 + errcheck: + # report about not checking of errors in type assertions: `a := b.(MyStruct)`; + # default is false: such cases aren't reported by default. + check-type-assertions: false + # report about assignment of errors to blank identifier: `num, _ := strconv.Atoi(numStr)`; + # default is false: such cases aren't reported by default. + check-blank: false + exclude-functions: + # FIXME: either check for close errors or wrap defer calls into closure with _ = file.Close() + - (*os.File).Close + errorlint: + # Check whether fmt.Errorf uses the %w verb for formatting errors. See the readme for caveats + errorf: true + # Check for plain type assertions and type switches + asserts: true + # Check for plain error comparisons + comparison: true + exhaustive: + # indicates that switch statements are to be considered exhaustive if a + # 'default' case is present, even if all enum members aren't listed in the + # switch + default-signifies-exhaustive: false + funlen: + lines: 60 + statements: 40 + gocognit: + # minimal code complexity to report, 30 by default (but we recommend 10-20) + min-complexity: 10 + nestif: + # minimal complexity of if statements to report, 5 by default + min-complexity: 4 + goconst: + # minimal length of string constant, 3 by default + min-len: 3 + # minimal occurrences count to trigger, 3 by default + min-occurrences: 3 + gocritic: + # Which checks should be enabled; can't be combined with 'disabled-checks'; + # See https://go-critic.github.io/overview#checks-overview + # To check which checks are enabled run `GL_DEBUG=gocritic golangci-lint run` + # By default list of stable checks is used. + enabled-checks: + - rangeValCopy + # Which checks should be disabled; can't be combined with 'enabled-checks'; default is empty + disabled-checks: + - regexpMust + # Enable multiple checks by tags, run `GL_DEBUG=gocritic golangci-lint run` to see all tags and checks. + # Empty list by default. See https://github.com/go-critic/go-critic#usage -> section "Tags". + enabled-tags: + - performance + disabled-tags: + - experimental + # Settings passed to gocritic. + # The settings key is the name of a supported gocritic checker. + # The list of supported checkers can be find in https://go-critic.github.io/overview. + settings: + captLocal: # must be valid enabled check name + # whether to restrict checker to params only (default true) + paramsOnly: true + elseif: + # whether to skip balanced if-else pairs (default true) + skipBalanced: true + hugeParam: + # size in bytes that makes the warning trigger (default 80) + sizeThreshold: 80 + nestingReduce: + # min number of statements inside a branch to trigger a warning (default 5) + bodyWidth: 5 + rangeExprCopy: + # size in bytes that makes the warning trigger (default 512) + sizeThreshold: 512 + # whether to check test functions (default true) + skipTestFuncs: true + rangeValCopy: + # size in bytes that makes the warning trigger (default 128) + sizeThreshold: 32 + # whether to check test functions (default true) + skipTestFuncs: true + truncateCmp: + # whether to skip int/uint/uintptr types (default true) + skipArchDependent: true + underef: + # whether to skip (*x).method() calls where x is a pointer receiver (default true) + skipRecvDeref: true + unnamedResult: + # whether to check exported functions + checkExported: true + gocyclo: + # minimal code complexity to report, 30 by default (but we recommend 10-20) + min-complexity: 10 + gosec: + # To select a subset of rules to run. + # Available rules: https://github.com/securego/gosec#available-rules + includes: + - G401 + - G306 + - G101 + # To specify a set of rules to explicitly exclude. + # Available rules: https://github.com/securego/gosec#available-rules + excludes: + - G204 + # To specify the configuration of rules. + # The configuration of rules is not fully documented by gosec: + # https://github.com/securego/gosec#configuration + # https://github.com/securego/gosec/blob/569328eade2ccbad4ce2d0f21ee158ab5356a5cf/rules/rulelist.go#L60-L102 + config: + G306: "0600" + G101: + pattern: "(?i)example" + ignore_entropy: false + entropy_threshold: "80.0" + per_char_threshold: "3.0" + truncate: "32" + govet: + # settings per analyzer + settings: + printf: # analyzer name, run `go tool vet help` to see all analyzers + funcs: # run `go tool vet help printf` to see available settings for `printf` analyzer + - (github.com/golangci/golangci-lint/pkg/logutils.Log).Infof + - (github.com/golangci/golangci-lint/pkg/logutils.Log).Warnf + - (github.com/golangci/golangci-lint/pkg/logutils.Log).Errorf + - (github.com/golangci/golangci-lint/pkg/logutils.Log).Fatalf + # enable or disable analyzers by name + # run `go tool vet help` to see all analyzers + enable-all: true + disable: + - shadow + - fieldalignment + importas: + # if set to `true`, force to use alias. + no-unaliased: true + # List of aliases + alias: + # using `servingv1` alias for `knative.dev/serving/pkg/apis/serving/v1` package + - pkg: knative.dev/serving/pkg/apis/serving/v1 + alias: servingv1 + # using `autoscalingv1alpha1` alias for `knative.dev/serving/pkg/apis/autoscaling/v1alpha1` package + - pkg: knative.dev/serving/pkg/apis/autoscaling/v1alpha1 + alias: autoscalingv1alpha1 + # You can specify the package path by regular expression, + # and alias by regular expression expansion syntax like below. + # see https://github.com/julz/importas#use-regular-expression for details + - pkg: knative.dev/serving/pkg/apis/(\w+)/(v[\w\d]+) + alias: $1$2 + lll: + # max line length, lines longer will be reported. Default is 120. + # '\t' is counted as 1 character by default, and can be changed with the tab-width option + line-length: 120 + # tab width in spaces. Default to 1. + tab-width: 1 + staticcheck: + # https://staticcheck.io/docs/options#checks + checks: [ "all", "-ST1000", "-ST1003", "-ST1016", "-ST1020", "-ST1021", "-ST1022", "-QF1008" ] + # https://staticcheck.io/docs/options#dot_import_whitelist + dot-import-whitelist: + - fmt + # https://staticcheck.io/docs/options#initialisms + initialisms: [ "ACL", "API", "ASCII", "CPU", "CSS", "DNS", "EOF", "GUID", "HTML", "HTTP", "HTTPS", "ID", "IP", "JSON", "QPS", "RAM", "RPC", "SLA", "SMTP", "SQL", "SSH", "TCP", "TLS", "TTL", "UDP", "UI", "GID", "UID", "UUID", "URI", "URL", "UTF8", "VM", "XML", "XMPP", "XSRF", "XSS" ] + # https://staticcheck.io/docs/options#http_status_code_whitelist + http-status-code-whitelist: [ "200", "400", "404", "500" ] + tagliatelle: + # check the struck tag name case + case: + # use the struct field name to check the name of the struct tag + use-field-name: true + rules: + # any struct tag type can be used. + # support string case: `camel`, `pascal`, `kebab`, `snake`, `goCamel`, `goPascal`, `goKebab`, `goSnake`, `upper`, `lower` + json: snake + yaml: camel + xml: camel + bson: camel + avro: snake + mapstructure: kebab + testpackage: + # regexp pattern to skip files + skip-regexp: (export|internal)_test\.go + thelper: + # The following configurations enable all checks. It can be omitted because all checks are enabled by default. + # You can enable only required checks deleting unnecessary checks. + test: + first: true + name: true + begin: true + benchmark: + first: true + name: true + begin: true + tb: + first: true + name: true + begin: true + whitespace: + multi-if: false # Enforces newlines (or comments) after every multi-line if statement + multi-func: false # Enforces newlines (or comments) after every multi-line function signatur + exclusions: + # Excluding configuration per-path, per-linter, per-text and per-source + rules: + # Exclude some linters from running on tests files. + - path: _test\.go + linters: + - gocyclo + - errcheck + - dupl + - gosec + # Exclude known linters from partially hard-vendored code, + # which is impossible to exclude via "nolint" comments. + - path: internal/hmac/ + text: "weak cryptographic primitive" + linters: + - gosec + # Exclude lll issues for long lines with go:generate + - linters: + - lll + source: "^//go:generate " + # The list of ids of default excludes to include or disable. By default it's empty. + presets: + - comments # disable excluding of issues about comments from golint + +formatters: + enable: + - gci + - gofmt + - gofumpt + - goimports + settings: + gci: + sections: + - standard + - default + - localmodule + gofmt: + # simplify code: gofmt with `-s` option, true by default + simplify: true + gofumpt: + # Choose whether or not to use the extra rules that are disabled + # by default + extra-rules: false + goimports: + # put imports beginning with prefix after 3rd-party packages; + local-prefixes: + - github.com/dstackai/dstack/runner severity: # Default value is empty string. @@ -325,12 +307,7 @@ severity: # - Code climate: https://docs.codeclimate.com/docs/issues#issue-severity # - Checkstyle: https://checkstyle.sourceforge.io/property_types.html#severity # - Github: https://help.github.com/en/actions/reference/workflow-commands-for-github-actions#setting-an-error-message - default-severity: error - - # The default value is false. - # If set to true severity-rules regular expressions become case sensitive. - case-sensitive: false - + default: error # Default value is empty list. # When a list of severity rules are provided, severity information will be added to lint # issues. Severity rules have the same filtering capability as exclude rules except you diff --git a/runner/cmd/runner/cmd.go b/runner/cmd/runner/cmd.go index d91b5d8fb1..ed3f400c42 100644 --- a/runner/cmd/runner/cmd.go +++ b/runner/cmd/runner/cmd.go @@ -4,8 +4,9 @@ import ( "log" "os" - "github.com/dstackai/dstack/runner/consts" "github.com/urfave/cli/v2" + + "github.com/dstackai/dstack/runner/consts" ) // Version is a build-time variable. The value is overridden by ldflags. diff --git a/runner/cmd/runner/main.go b/runner/cmd/runner/main.go index 001e805373..fc48233c62 100644 --- a/runner/cmd/runner/main.go +++ b/runner/cmd/runner/main.go @@ -8,10 +8,11 @@ import ( "os" "path/filepath" + "github.com/sirupsen/logrus" + "github.com/dstackai/dstack/runner/consts" "github.com/dstackai/dstack/runner/internal/log" "github.com/dstackai/dstack/runner/internal/runner/api" - "github.com/sirupsen/logrus" ) func main() { diff --git a/runner/cmd/shim/main.go b/runner/cmd/shim/main.go index 63d459a8cd..b7f52d26a9 100644 --- a/runner/cmd/shim/main.go +++ b/runner/cmd/shim/main.go @@ -11,14 +11,15 @@ import ( "path/filepath" "time" + "github.com/sirupsen/logrus" + "github.com/urfave/cli/v2" + "github.com/dstackai/dstack/runner/consts" "github.com/dstackai/dstack/runner/internal/common" "github.com/dstackai/dstack/runner/internal/log" "github.com/dstackai/dstack/runner/internal/shim" "github.com/dstackai/dstack/runner/internal/shim/api" "github.com/dstackai/dstack/runner/internal/shim/dcgm" - "github.com/sirupsen/logrus" - "github.com/urfave/cli/v2" ) // Version is a build-time variable. The value is overridden by ldflags. diff --git a/runner/go.mod b/runner/go.mod index b72312768e..8c474cc42d 100644 --- a/runner/go.mod +++ b/runner/go.mod @@ -1,6 +1,6 @@ module github.com/dstackai/dstack/runner -go 1.23.8 +go 1.25 require ( github.com/NVIDIA/go-dcgm v0.0.0-20250707210631-823394f2bd9b diff --git a/runner/internal/api/common.go b/runner/internal/api/common.go index 2f2f12d04c..52fa886a0f 100644 --- a/runner/internal/api/common.go +++ b/runner/internal/api/common.go @@ -8,8 +8,9 @@ import ( "net/http" "strings" - "github.com/dstackai/dstack/runner/internal/log" "github.com/golang/gddo/httputil/header" + + "github.com/dstackai/dstack/runner/internal/log" ) type Error struct { diff --git a/runner/internal/connections/connections.go b/runner/internal/connections/connections.go index 74214a4d8f..37aedad7a2 100644 --- a/runner/internal/connections/connections.go +++ b/runner/internal/connections/connections.go @@ -6,8 +6,9 @@ import ( "sync" "time" - "github.com/dstackai/dstack/runner/internal/log" "github.com/prometheus/procfs" + + "github.com/dstackai/dstack/runner/internal/log" ) const connStateEstablished = 1 @@ -112,11 +113,11 @@ func (t *ConnectionTracker) getCurrentConnections() (map[connection]struct{}, er connections := make(map[connection]struct{}) netTCP, err := t.cfg.Procfs.NetTCP() if err != nil { - return nil, fmt.Errorf("Failed to retrieve IPv4 network connections: %w", err) + return nil, fmt.Errorf("failed to retrieve IPv4 network connections: %w", err) } netTCP6, err := t.cfg.Procfs.NetTCP6() if err != nil { - return nil, fmt.Errorf("Failed to retrieve IPv6 network connections: %w", err) + return nil, fmt.Errorf("failed to retrieve IPv6 network connections: %w", err) } for _, conn := range append(netTCP, netTCP6...) { if conn.LocalPort == t.cfg.Port && conn.St == connStateEstablished { diff --git a/runner/internal/executor/executor.go b/runner/internal/executor/executor.go index 0311358624..7540315a50 100644 --- a/runner/internal/executor/executor.go +++ b/runner/internal/executor/executor.go @@ -21,14 +21,15 @@ import ( "github.com/creack/pty" "github.com/dstackai/ansistrip" + "github.com/prometheus/procfs" + "golang.org/x/sys/unix" + "github.com/dstackai/dstack/runner/consts" "github.com/dstackai/dstack/runner/internal/common" "github.com/dstackai/dstack/runner/internal/connections" "github.com/dstackai/dstack/runner/internal/log" "github.com/dstackai/dstack/runner/internal/schemas" "github.com/dstackai/dstack/runner/internal/types" - "github.com/prometheus/procfs" - "golang.org/x/sys/unix" ) // TODO: Tune these parameters for optimal experience/performance @@ -176,7 +177,7 @@ func (ex *RunExecutor) Run(ctx context.Context) (err error) { }() stripper := ansistrip.NewWriter(ex.runnerLogs, AnsiStripFlushInterval, AnsiStripMaxDelay, MaxBufferSize) - defer stripper.Close() + defer func() { _ = stripper.Close() }() logger := io.MultiWriter(runnerLogFile, os.Stdout, stripper) ctx = log.WithLogger(ctx, log.NewEntry(logger, int(log.DefaultEntry.Logger.Level))) // todo loglevel log.Info(ctx, "Run job", "log_level", log.GetLogger(ctx).Logger.Level.String()) @@ -408,7 +409,7 @@ func (ex *RunExecutor) execJob(ctx context.Context, jobLogFile io.Writer) error } // Call buildLDLibraryPathEnv and update jobEnvs if no error occurs - newLDPath, err := buildLDLibraryPathEnv() + newLDPath, err := buildLDLibraryPathEnv(ctx) if err != nil { log.Info(ctx, "Continuing without updating LD_LIBRARY_PATH") } else { @@ -546,7 +547,7 @@ func (ex *RunExecutor) execJob(ctx context.Context, jobLogFile io.Writer) error defer func() { _ = cmd.Wait() }() // release resources if copy fails stripper := ansistrip.NewWriter(ex.jobLogs, AnsiStripFlushInterval, AnsiStripMaxDelay, MaxBufferSize) - defer stripper.Close() + defer func() { _ = stripper.Close() }() logger := io.MultiWriter(jobLogFile, ex.jobWsLogs, stripper) _, err = io.Copy(logger, ptm) if err != nil && !isPtyError(err) { @@ -616,9 +617,9 @@ func isPtyError(err error) bool { return errors.As(err, &e) && errors.Is(e.Err, syscall.EIO) } -func buildLDLibraryPathEnv() (string, error) { +func buildLDLibraryPathEnv(ctx context.Context) (string, error) { // Execute shell command to get Python prefix - cmd := exec.Command("bash", "-i", "-c", "python3-config --prefix") + cmd := exec.CommandContext(ctx, "bash", "-i", "-c", "python3-config --prefix") output, err := cmd.Output() if err != nil { return "", fmt.Errorf("error executing command: %w", err) @@ -907,7 +908,7 @@ func includeDstackProfile(profilePath string, dstackProfilePath string) error { return fmt.Errorf("open profile file: %w", err) } defer file.Close() - if _, err = file.WriteString(fmt.Sprintf("\n. '%s'\n", dstackProfilePath)); err != nil { + if _, err = fmt.Fprintf(file, "\n. '%s'\n", dstackProfilePath); err != nil { return fmt.Errorf("write profile include: %w", err) } if err = os.Chmod(profilePath, 0o644); err != nil { diff --git a/runner/internal/executor/executor_test.go b/runner/internal/executor/executor_test.go index 26798b0f75..0e0b14d84e 100644 --- a/runner/internal/executor/executor_test.go +++ b/runner/internal/executor/executor_test.go @@ -247,7 +247,7 @@ func TestWriteDstackProfile(t *testing.T) { for _, value := range testCases { env := map[string]string{"VAR": value} writeDstackProfile(env, path) - cmd := exec.Command("/bin/sh", "-c", script) + cmd := exec.CommandContext(t.Context(), "/bin/sh", "-c", script) out, err := cmd.Output() assert.NoError(t, err) assert.Equal(t, value, string(out)) diff --git a/runner/internal/executor/files.go b/runner/internal/executor/files.go index f3eef883fe..923af1006b 100644 --- a/runner/internal/executor/files.go +++ b/runner/internal/executor/files.go @@ -11,6 +11,7 @@ import ( "regexp" "github.com/codeclysm/extract/v4" + "github.com/dstackai/dstack/runner/internal/common" "github.com/dstackai/dstack/runner/internal/log" ) diff --git a/runner/internal/executor/repo.go b/runner/internal/executor/repo.go index d536c4955b..5203190c14 100644 --- a/runner/internal/executor/repo.go +++ b/runner/internal/executor/repo.go @@ -9,6 +9,7 @@ import ( "path/filepath" "github.com/codeclysm/extract/v4" + "github.com/dstackai/dstack/runner/internal/common" "github.com/dstackai/dstack/runner/internal/log" "github.com/dstackai/dstack/runner/internal/repo" @@ -193,7 +194,7 @@ func (ex *RunExecutor) restoreRepoDir(tmpDir string) error { func moveDir(srcDir, dstDir string) error { // We cannot just move/rename files because with volumes they'll be on different devices - cmd := exec.Command("cp", "-a", srcDir+"/.", dstDir) + cmd := exec.CommandContext(context.TODO(), "cp", "-a", srcDir+"/.", dstDir) if output, err := cmd.CombinedOutput(); err != nil { return fmt.Errorf("failed to cp: %w, output: %s", err, string(output)) } diff --git a/runner/internal/repo/diff.go b/runner/internal/repo/diff.go index ebb4580cbd..43e6b2e20f 100644 --- a/runner/internal/repo/diff.go +++ b/runner/internal/repo/diff.go @@ -11,6 +11,7 @@ import ( "strings" "github.com/bluekeyes/go-gitdiff/gitdiff" + "github.com/dstackai/dstack/runner/internal/log" ) diff --git a/runner/internal/repo/diff_test.go b/runner/internal/repo/diff_test.go index 4aa422b87f..3976c1b957 100644 --- a/runner/internal/repo/diff_test.go +++ b/runner/internal/repo/diff_test.go @@ -220,7 +220,7 @@ LcmZQzWMT#Y01f~L }, { name: "Executable perm", - expMode: 0100, + expMode: 0o100, diff: "diff --git a/original b/original\nold mode 100644\nnew mode 100755\n", }, { @@ -290,8 +290,7 @@ index 9ce1261..f9c7821 100644 }, } - content := - `First line. + content := `First line. Second line. Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. diff --git a/runner/internal/repo/manager.go b/runner/internal/repo/manager.go index 5c20a8d274..ac96f50543 100644 --- a/runner/internal/repo/manager.go +++ b/runner/internal/repo/manager.go @@ -4,12 +4,13 @@ import ( "context" "fmt" - "github.com/dstackai/dstack/runner/internal/log" "github.com/go-git/go-git/v5" "github.com/go-git/go-git/v5/plumbing" "github.com/go-git/go-git/v5/plumbing/transport/http" gitssh "github.com/go-git/go-git/v5/plumbing/transport/ssh" "golang.org/x/crypto/ssh" + + "github.com/dstackai/dstack/runner/internal/log" ) type Manager struct { @@ -62,7 +63,7 @@ func (m *Manager) WithSSHAuth(pem, password string) *Manager { if err != nil { log.Warning(m.ctx, "fail to parse SSH private key", "err", err) } else { - keys.HostKeyCallbackHelper.HostKeyCallback = ssh.InsecureIgnoreHostKey() + keys.HostKeyCallback = ssh.InsecureIgnoreHostKey() m.clo.Auth = keys } return m diff --git a/runner/internal/runner/api/http.go b/runner/internal/runner/api/http.go index c28c5e497c..ac13b5e5b4 100644 --- a/runner/internal/runner/api/http.go +++ b/runner/internal/runner/api/http.go @@ -95,7 +95,7 @@ func (s *Server) uploadArchivePostHandler(w http.ResponseWriter, r *http.Request if err != nil { return nil, fmt.Errorf("read multipart form: %w", err) } - defer part.Close() + defer func() { _ = part.Close() }() fieldName := part.FormName() if fieldName == "" { diff --git a/runner/internal/runner/api/ws.go b/runner/internal/runner/api/ws.go index 2dbd81f072..bc6e476c0e 100644 --- a/runner/internal/runner/api/ws.go +++ b/runner/internal/runner/api/ws.go @@ -6,8 +6,9 @@ import ( "net/http" "time" - "github.com/dstackai/dstack/runner/internal/log" "github.com/gorilla/websocket" + + "github.com/dstackai/dstack/runner/internal/log" ) type logsWsRequestParams struct { @@ -46,7 +47,7 @@ func parseRequestParams(r *http.Request) (logsWsRequestParams, error) { if startTimeStr != "" { t, err := time.Parse(time.RFC3339, startTimeStr) if err != nil { - return logsWsRequestParams{}, errors.New("Failed to parse start_time value") + return logsWsRequestParams{}, errors.New("failed to parse start_time value") } startTimestamp = t.Unix() } @@ -93,7 +94,7 @@ func (s *Server) streamJobLogs(ctx context.Context, conn *websocket.Conn, params for currentPos < len(jobLogsWsHistory) { if err := conn.WriteMessage(websocket.BinaryMessage, jobLogsWsHistory[currentPos].Message); err != nil { s.executor.RUnlock() - log.Error(ctx, "Failed to write message", "err", err) + log.Error(ctx, "failed to write message", "err", err) return } currentPos++ diff --git a/runner/internal/shim/backends/aws.go b/runner/internal/shim/backends/aws.go index 366d21d728..1fe7fb890a 100644 --- a/runner/internal/shim/backends/aws.go +++ b/runner/internal/shim/backends/aws.go @@ -2,6 +2,7 @@ package backends import ( "bytes" + "context" "fmt" "os" "os/exec" @@ -26,7 +27,7 @@ func NewAWSBackend() *AWSBackend { func (e *AWSBackend) GetRealDeviceName(volumeID, deviceName string) (string, error) { // Run the lsblk command to get block device information // On AWS, SERIAL contains volume id. - cmd := exec.Command("lsblk", "-o", "NAME,SERIAL") + cmd := exec.CommandContext(context.TODO(), "lsblk", "-o", "NAME,SERIAL") var out bytes.Buffer cmd.Stdout = &out if err := cmd.Run(); err != nil { @@ -67,7 +68,7 @@ func (e *AWSBackend) GetRealDeviceName(volumeID, deviceName string) (string, err } // Run lsblk again to check for partitions on the base device - cmd = exec.Command("lsblk", "-ln", "-o", "NAME", baseDevice) + cmd = exec.CommandContext(context.TODO(), "lsblk", "-ln", "-o", "NAME", baseDevice) out.Reset() cmd.Stdout = &out if err := cmd.Run(); err != nil { diff --git a/runner/internal/shim/dcgm/exporter.go b/runner/internal/shim/dcgm/exporter.go index de4ac939ee..f49fb91aee 100644 --- a/runner/internal/shim/dcgm/exporter.go +++ b/runner/internal/shim/dcgm/exporter.go @@ -16,6 +16,7 @@ import ( "time" "github.com/alexellis/go-execute/v2" + "github.com/dstackai/dstack/runner/internal/log" ) @@ -141,7 +142,7 @@ func (c *DCGMExporter) Stop(context.Context) error { return errors.New("not started") } c.cancel() - os.Remove(c.configPath) + _ = os.Remove(c.configPath) return c.cmd.Wait() } @@ -163,7 +164,7 @@ func (c *DCGMExporter) Fetch(ctx context.Context) ([]byte, error) { if err != nil { return nil, err } - defer resp.Body.Close() + defer func() { _ = resp.Body.Close() }() if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("status is not OK: %d", resp.StatusCode) } diff --git a/runner/internal/shim/docker.go b/runner/internal/shim/docker.go index d566453a29..04519f6dd7 100644 --- a/runner/internal/shim/docker.go +++ b/runner/internal/shim/docker.go @@ -29,13 +29,14 @@ import ( "github.com/docker/docker/pkg/stdcopy" "github.com/docker/go-connections/nat" "github.com/docker/go-units" + bytesize "github.com/inhies/go-bytesize" + "github.com/dstackai/dstack/runner/consts" "github.com/dstackai/dstack/runner/internal/common" "github.com/dstackai/dstack/runner/internal/log" "github.com/dstackai/dstack/runner/internal/shim/backends" "github.com/dstackai/dstack/runner/internal/shim/host" "github.com/dstackai/dstack/runner/internal/types" - bytesize "github.com/inhies/go-bytesize" ) // TODO: Allow for configuration via cli arguments or environment variables. @@ -135,7 +136,7 @@ func (d *DockerRunner) restoreStateFromContainers(ctx context.Context) error { } else { switch d.gpuVendor { case common.GpuVendorNvidia: - deviceRequests := containerFull.HostConfig.Resources.DeviceRequests + deviceRequests := containerFull.HostConfig.DeviceRequests if len(deviceRequests) == 1 { gpuIDs = deviceRequests[0].DeviceIDs } else if len(deviceRequests) != 0 { @@ -146,13 +147,13 @@ func (d *DockerRunner) restoreStateFromContainers(ctx context.Context) error { ) } case common.GpuVendorAmd: - for _, device := range containerFull.HostConfig.Resources.Devices { + for _, device := range containerFull.HostConfig.Devices { if host.IsRenderNodePath(device.PathOnHost) { gpuIDs = append(gpuIDs, device.PathOnHost) } } case common.GpuVendorTenstorrent: - for _, device := range containerFull.HostConfig.Resources.Devices { + for _, device := range containerFull.HostConfig.Devices { if strings.HasPrefix(device.PathOnHost, "/dev/tenstorrent/") { // Extract the device ID from the path deviceID := strings.TrimPrefix(device.PathOnHost, "/dev/tenstorrent/") @@ -534,12 +535,12 @@ func unmountVolumes(ctx context.Context, taskConfig TaskConfig) error { var failed []string for _, volume := range taskConfig.Volumes { mountPoint := getVolumeMountPoint(volume.Name) - cmd := exec.Command("mountpoint", mountPoint) + cmd := exec.CommandContext(ctx, "mountpoint", mountPoint) if output, err := cmd.CombinedOutput(); err != nil { log.Info(ctx, "skipping", "mountpoint", mountPoint, "output", output) continue } - cmd = exec.Command("umount", "-qf", mountPoint) + cmd = exec.CommandContext(ctx, "umount", "-qf", mountPoint) if output, err := cmd.CombinedOutput(); err != nil { log.Error(ctx, "failed to unmount", "mountpoint", mountPoint, "output", output) failed = append(failed, mountPoint) @@ -617,7 +618,7 @@ func prepareInstanceMountPoints(taskConfig TaskConfig) error { // Returns true if the file system is created. func initFileSystem(ctx context.Context, deviceName string, errorIfNotExists bool) (bool, error) { // Run the lsblk command to get filesystem type - cmd := exec.Command("lsblk", "-no", "FSTYPE", deviceName) + cmd := exec.CommandContext(ctx, "lsblk", "-no", "FSTYPE", deviceName) var out bytes.Buffer cmd.Stdout = &out if err := cmd.Run(); err != nil { @@ -635,7 +636,7 @@ func initFileSystem(ctx context.Context, deviceName string, errorIfNotExists boo } log.Debug(ctx, "formatting disk with ext4 filesystem...", "device", deviceName) - cmd = exec.Command("mkfs.ext4", "-F", deviceName) + cmd = exec.CommandContext(ctx, "mkfs.ext4", "-F", deviceName) if output, err := cmd.CombinedOutput(); err != nil { return false, fmt.Errorf("failed to format disk: %w, output: %s", err, string(output)) } @@ -654,7 +655,7 @@ func mountDisk(ctx context.Context, deviceName, mountPoint string, fsRootPerms o // Mount the disk to the mount point log.Debug(ctx, "mounting disk...", "device", deviceName, "mountpoint", mountPoint) - cmd := exec.Command("mount", deviceName, mountPoint) + cmd := exec.CommandContext(ctx, "mount", deviceName, mountPoint) if output, err := cmd.CombinedOutput(); err != nil { return fmt.Errorf("failed to mount disk: %w, output: %s", err, string(output)) } @@ -699,7 +700,7 @@ func pullImage(ctx context.Context, client docker.APIClient, taskConfig TaskConf if err != nil { return fmt.Errorf("pull image: %w", err) } - defer reader.Close() + defer func() { _ = reader.Close() }() logFile, err := os.OpenFile(logPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0o644) if err != nil { @@ -848,8 +849,8 @@ func (d *DockerRunner) createContainer(ctx context.Context, task *Task) error { ShmSize: task.config.ShmSize, Tmpfs: tmpfs, } - hostConfig.Resources.NanoCPUs = int64(task.config.CPU * 1000000000) - hostConfig.Resources.Memory = task.config.Memory + hostConfig.NanoCPUs = int64(task.config.CPU * 1000000000) + hostConfig.Memory = task.config.Memory if len(task.gpuIDs) > 0 { if len(task.config.GPUDevices) > 0 { configureGpuDevices(hostConfig, task.config.GPUDevices) @@ -1034,8 +1035,8 @@ func getNetworkMode(networkMode NetworkMode) container.NetworkMode { func configureGpuDevices(hostConfig *container.HostConfig, gpuDevices []GPUDevice) { for _, gpuDevice := range gpuDevices { - hostConfig.Resources.Devices = append( - hostConfig.Resources.Devices, + hostConfig.Devices = append( + hostConfig.Devices, container.DeviceMapping{ PathOnHost: gpuDevice.PathOnHost, PathInContainer: gpuDevice.PathInContainer, @@ -1051,8 +1052,8 @@ func configureGpus(config *container.Config, hostConfig *container.HostConfig, v // Tenstorrent: ids are device indices to be used with /dev/tenstorrent/ switch vendor { case common.GpuVendorNvidia: - hostConfig.Resources.DeviceRequests = append( - hostConfig.Resources.DeviceRequests, + hostConfig.DeviceRequests = append( + hostConfig.DeviceRequests, container.DeviceRequest{ // Request all capabilities to maximize compatibility with all sorts of GPU workloads. // Default capabilities: utility, compute. @@ -1065,8 +1066,8 @@ func configureGpus(config *container.Config, hostConfig *container.HostConfig, v // All options are listed here: https://hub.docker.com/r/rocm/pytorch // Only --device are mandatory, other seem to be performance-related. // --device=/dev/kfd - hostConfig.Resources.Devices = append( - hostConfig.Resources.Devices, + hostConfig.Devices = append( + hostConfig.Devices, container.DeviceMapping{ PathOnHost: "/dev/kfd", PathInContainer: "/dev/kfd", @@ -1075,8 +1076,8 @@ func configureGpus(config *container.Config, hostConfig *container.HostConfig, v ) // --device=/dev/dri/renderD for _, renderNodePath := range ids { - hostConfig.Resources.Devices = append( - hostConfig.Resources.Devices, + hostConfig.Devices = append( + hostConfig.Devices, container.DeviceMapping{ PathOnHost: renderNodePath, PathInContainer: renderNodePath, @@ -1095,8 +1096,8 @@ func configureGpus(config *container.Config, hostConfig *container.HostConfig, v // For Tenstorrent, simply add each device for _, id := range ids { devicePath := fmt.Sprintf("/dev/tenstorrent/%s", id) - hostConfig.Resources.Devices = append( - hostConfig.Resources.Devices, + hostConfig.Devices = append( + hostConfig.Devices, container.DeviceMapping{ PathOnHost: devicePath, PathInContainer: devicePath, @@ -1131,8 +1132,8 @@ func configureGpus(config *container.Config, hostConfig *container.HostConfig, v func configureHpcNetworkingIfAvailable(hostConfig *container.HostConfig) { // Although AWS EFA is not InfiniBand, EFA adapters are exposed as /dev/infiniband/uverbsN (N=0,1,...) if _, err := os.Stat("/dev/infiniband"); !errors.Is(err, os.ErrNotExist) { - hostConfig.Resources.Devices = append( - hostConfig.Resources.Devices, + hostConfig.Devices = append( + hostConfig.Devices, container.DeviceMapping{ PathOnHost: "/dev/infiniband", PathInContainer: "/dev/infiniband", @@ -1181,7 +1182,7 @@ func getContainerLastLogs(ctx context.Context, client docker.APIClient, containe if err != nil { return nil, err } - defer muxedReader.Close() + defer func() { _ = muxedReader.Close() }() demuxedBuffer := new(bytes.Buffer) // Using the same Writer for both stdout and stderr should be roughly equivalent to 2>&1 diff --git a/runner/internal/shim/docker_test.go b/runner/internal/shim/docker_test.go index f6628e404b..a1bdfd2d8d 100644 --- a/runner/internal/shim/docker_test.go +++ b/runner/internal/shim/docker_test.go @@ -54,7 +54,7 @@ func TestDocker_SSHServerConnect(t *testing.T) { t.Parallel() tempDir := t.TempDir() - require.NoError(t, exec.Command("ssh-keygen", "-t", "rsa", "-b", "2048", "-f", tempDir+"/id_rsa", "-q", "-N", "").Run()) + require.NoError(t, exec.CommandContext(t.Context(), "ssh-keygen", "-t", "rsa", "-b", "2048", "-f", tempDir+"/id_rsa", "-q", "-N", "").Run()) publicBytes, err := os.ReadFile(tempDir + "/id_rsa.pub") require.NoError(t, err) @@ -84,7 +84,9 @@ func TestDocker_SSHServerConnect(t *testing.T) { }() for i := 0; i < timeout; i++ { - cmd := exec.Command("ssh", + cmd := exec.CommandContext( + t.Context(), + "ssh", "-F", "none", "-o", "StrictHostKeyChecking=no", "-o", "UserKnownHostsFile=/dev/null", diff --git a/runner/internal/shim/host/gpu.go b/runner/internal/shim/host/gpu.go index 6cae448e62..b2b2135efc 100644 --- a/runner/internal/shim/host/gpu.go +++ b/runner/internal/shim/host/gpu.go @@ -12,6 +12,7 @@ import ( "strings" execute "github.com/alexellis/go-execute/v2" + "github.com/dstackai/dstack/runner/internal/common" "github.com/dstackai/dstack/runner/internal/log" ) diff --git a/runner/internal/shim/host/host.go b/runner/internal/shim/host/host.go index d2c9c030cc..bc54a407c7 100644 --- a/runner/internal/shim/host/host.go +++ b/runner/internal/shim/host/host.go @@ -6,9 +6,10 @@ import ( "net" "runtime" - "github.com/dstackai/dstack/runner/internal/log" "github.com/shirou/gopsutil/v4/mem" "golang.org/x/sys/unix" + + "github.com/dstackai/dstack/runner/internal/log" ) func GetCpuCount(ctx context.Context) int {