diff --git a/.gitignore b/.gitignore index 488af727..97f92b00 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ cmd/keygen/keygen cmd/keyforge/keyforge cmd/bootstrap-limiter/bootstrap-limiter +./tmp-* dist/ runtime/ *.env diff --git a/api/b7s-swagger.yaml b/api/b7s-swagger.yaml index d1b5441c..855709fe 100644 --- a/api/b7s-swagger.yaml +++ b/api/b7s-swagger.yaml @@ -84,7 +84,6 @@ paths: description: Invalid execution request '500': description: Internal server error - /api/v1/functions/requests/result: post: @@ -111,6 +110,30 @@ paths: '500': description: Internal server error + /api/v1/functions/execute/batch/result: + post: + tags: + - functions + summary: Get the result of a Batch Execution Request + description: Get the result of a Batch Execution Request + operationId: batchExecutionResult + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/BatchResultRequest' + required: true + responses: + '200': + description: Batch Execution result retrieved + content: + application/json: + schema: + $ref: '#/components/schemas/BatchExecutionResult' + '400': + description: Invalid request + '500': + description: Internal server error /api/v1/functions/install: post: @@ -158,9 +181,18 @@ components: example: hello-world.wasm description: Name of the WASM file to execute x-go-type-skip-optional-pointer: true - parameters: + arguments: type: array description: CLI arguments for the Bless Function + items: + type: string + example: + - [ "--arg", "first-invocation-argument", "other arguments..." ] + x-go-type-skip-optional-pointer: true + parameters: + type: array + deprecated: true + x-deprecated-reason: Use the `arguments` field. items: $ref: '#/components/schemas/ExecutionParameter' example: @@ -545,7 +577,6 @@ components: - [ "--arg", "second-invocation-argument", "other arguments..." ] - [ "--arg", "third-invocation-argument", "other arguments..." ] x-go-type-skip-optional-pointer: true - TemplateExecutionRequest: required: @@ -567,8 +598,16 @@ components: config: $ref: '#/components/schemas/ExecutionConfig' - BatchExecutionResponse: + type: object + properties: + request_id: + description: ID of the Execution Request + type: string + example: b6fbbc5e-1d16-4ea9-b557-51f4a6ab565c + x-go-type-skip-optional-pointer: true + + BatchExecutionResult: type: object properties: code: @@ -585,15 +624,15 @@ components: description: If the Batch Execution Request failed, this message might have more info about the error type: string x-go-type-skip-optional-pointer: true - strands: + chunks: description: Results of the execution of the Batch Request, executed by different nodes x-go-type-skip-optional-pointer: true additionalProperties: - $ref: '#/components/schemas/StrandResults' + $ref: '#/components/schemas/ChunkResults' - StrandResults: + ChunkResults: type: object - x-go-type: response.NodeStrandResults + x-go-type: response.NodeChunkResults x-go-type-import: path: github.com/blessnetwork/b7s/models/response properties: @@ -634,8 +673,15 @@ components: type: object x-go-type-skip-optional-pointer: true - - - - - \ No newline at end of file + BatchResultRequest: + description: Get the result of an Batch Execution Request, identified by the request ID + type: object + required: + - id + x-go-type-skip-optional-pointer: true + properties: + id: + description: ID of the Batch Execution Request + type: string + example: b6fbbc5e-1d16-4ea9-b557-51f4a6ab565c + x-go-type-skip-optional-pointer: true diff --git a/api/batch_result.go b/api/batch_result.go new file mode 100644 index 00000000..ad2fe502 --- /dev/null +++ b/api/batch_result.go @@ -0,0 +1,31 @@ +package api + +import ( + "fmt" + "net/http" + + "github.com/labstack/echo/v4" +) + +func (a *API) BatchExecutionResult(ctx echo.Context) error { + + var req BatchResultRequest + err := ctx.Bind(&req) + if err != nil { + return echo.NewHTTPError(http.StatusBadRequest, fmt.Errorf("could not unpack request: %w", err)) + } + + res, err := a.Node.GetBatchResults(ctx.Request().Context(), req.Id) + if err != nil { + return echo.NewHTTPError(http.StatusInternalServerError, fmt.Errorf("could not retrieve batch execution result: %w", err)) + } + + out := BatchExecutionResult{ + RequestId: res.RequestID, + Code: res.Code.String(), + Message: res.ErrorMessage, + Chunks: res.Chunks, + } + + return ctx.JSON(http.StatusOK, out) +} diff --git a/api/client.gen.go b/api/client.gen.go index df7632e8..4b425e49 100644 --- a/api/client.gen.go +++ b/api/client.gen.go @@ -1,6 +1,6 @@ // Package api provides primitives to interact with the openapi HTTP API. // -// Code generated by github.com/oapi-codegen/oapi-codegen/v2 version v2.4.1 DO NOT EDIT. +// Code generated by github.com/oapi-codegen/oapi-codegen/v2 version v2.5.0 DO NOT EDIT. package api import ( @@ -97,6 +97,11 @@ type ClientInterface interface { ExecuteFunctionBatch(ctx context.Context, body ExecuteFunctionBatchJSONRequestBody, reqEditors ...RequestEditorFn) (*http.Response, error) + // BatchExecutionResultWithBody request with any body + BatchExecutionResultWithBody(ctx context.Context, contentType string, body io.Reader, reqEditors ...RequestEditorFn) (*http.Response, error) + + BatchExecutionResult(ctx context.Context, body BatchExecutionResultJSONRequestBody, reqEditors ...RequestEditorFn) (*http.Response, error) + // InstallFunctionWithBody request with any body InstallFunctionWithBody(ctx context.Context, contentType string, body io.Reader, reqEditors ...RequestEditorFn) (*http.Response, error) @@ -159,6 +164,30 @@ func (c *Client) ExecuteFunctionBatch(ctx context.Context, body ExecuteFunctionB return c.Client.Do(req) } +func (c *Client) BatchExecutionResultWithBody(ctx context.Context, contentType string, body io.Reader, reqEditors ...RequestEditorFn) (*http.Response, error) { + req, err := NewBatchExecutionResultRequestWithBody(c.Server, contentType, body) + if err != nil { + return nil, err + } + req = req.WithContext(ctx) + if err := c.applyEditors(ctx, req, reqEditors); err != nil { + return nil, err + } + return c.Client.Do(req) +} + +func (c *Client) BatchExecutionResult(ctx context.Context, body BatchExecutionResultJSONRequestBody, reqEditors ...RequestEditorFn) (*http.Response, error) { + req, err := NewBatchExecutionResultRequest(c.Server, body) + if err != nil { + return nil, err + } + req = req.WithContext(ctx) + if err := c.applyEditors(ctx, req, reqEditors); err != nil { + return nil, err + } + return c.Client.Do(req) +} + func (c *Client) InstallFunctionWithBody(ctx context.Context, contentType string, body io.Reader, reqEditors ...RequestEditorFn) (*http.Response, error) { req, err := NewInstallFunctionRequestWithBody(c.Server, contentType, body) if err != nil { @@ -299,6 +328,46 @@ func NewExecuteFunctionBatchRequestWithBody(server string, contentType string, b return req, nil } +// NewBatchExecutionResultRequest calls the generic BatchExecutionResult builder with application/json body +func NewBatchExecutionResultRequest(server string, body BatchExecutionResultJSONRequestBody) (*http.Request, error) { + var bodyReader io.Reader + buf, err := json.Marshal(body) + if err != nil { + return nil, err + } + bodyReader = bytes.NewReader(buf) + return NewBatchExecutionResultRequestWithBody(server, "application/json", bodyReader) +} + +// NewBatchExecutionResultRequestWithBody generates requests for BatchExecutionResult with any type of body +func NewBatchExecutionResultRequestWithBody(server string, contentType string, body io.Reader) (*http.Request, error) { + var err error + + serverURL, err := url.Parse(server) + if err != nil { + return nil, err + } + + operationPath := fmt.Sprintf("/api/v1/functions/execute/batch/result") + if operationPath[0] == '/' { + operationPath = "." + operationPath + } + + queryURL, err := serverURL.Parse(operationPath) + if err != nil { + return nil, err + } + + req, err := http.NewRequest("POST", queryURL.String(), body) + if err != nil { + return nil, err + } + + req.Header.Add("Content-Type", contentType) + + return req, nil +} + // NewInstallFunctionRequest calls the generic InstallFunction builder with application/json body func NewInstallFunctionRequest(server string, body InstallFunctionJSONRequestBody) (*http.Request, error) { var bodyReader io.Reader @@ -459,6 +528,11 @@ type ClientWithResponsesInterface interface { ExecuteFunctionBatchWithResponse(ctx context.Context, body ExecuteFunctionBatchJSONRequestBody, reqEditors ...RequestEditorFn) (*ExecuteFunctionBatchResponse, error) + // BatchExecutionResultWithBodyWithResponse request with any body + BatchExecutionResultWithBodyWithResponse(ctx context.Context, contentType string, body io.Reader, reqEditors ...RequestEditorFn) (*BatchExecutionResultResponse, error) + + BatchExecutionResultWithResponse(ctx context.Context, body BatchExecutionResultJSONRequestBody, reqEditors ...RequestEditorFn) (*BatchExecutionResultResponse, error) + // InstallFunctionWithBodyWithResponse request with any body InstallFunctionWithBodyWithResponse(ctx context.Context, contentType string, body io.Reader, reqEditors ...RequestEditorFn) (*InstallFunctionResponse, error) @@ -517,6 +591,28 @@ func (r ExecuteFunctionBatchResponse) StatusCode() int { return 0 } +type BatchExecutionResultResponse struct { + Body []byte + HTTPResponse *http.Response + JSON200 *BatchExecutionResult +} + +// Status returns HTTPResponse.Status +func (r BatchExecutionResultResponse) Status() string { + if r.HTTPResponse != nil { + return r.HTTPResponse.Status + } + return http.StatusText(0) +} + +// StatusCode returns HTTPResponse.StatusCode +func (r BatchExecutionResultResponse) StatusCode() int { + if r.HTTPResponse != nil { + return r.HTTPResponse.StatusCode + } + return 0 +} + type InstallFunctionResponse struct { Body []byte HTTPResponse *http.Response @@ -617,6 +713,23 @@ func (c *ClientWithResponses) ExecuteFunctionBatchWithResponse(ctx context.Conte return ParseExecuteFunctionBatchResponse(rsp) } +// BatchExecutionResultWithBodyWithResponse request with arbitrary body returning *BatchExecutionResultResponse +func (c *ClientWithResponses) BatchExecutionResultWithBodyWithResponse(ctx context.Context, contentType string, body io.Reader, reqEditors ...RequestEditorFn) (*BatchExecutionResultResponse, error) { + rsp, err := c.BatchExecutionResultWithBody(ctx, contentType, body, reqEditors...) + if err != nil { + return nil, err + } + return ParseBatchExecutionResultResponse(rsp) +} + +func (c *ClientWithResponses) BatchExecutionResultWithResponse(ctx context.Context, body BatchExecutionResultJSONRequestBody, reqEditors ...RequestEditorFn) (*BatchExecutionResultResponse, error) { + rsp, err := c.BatchExecutionResult(ctx, body, reqEditors...) + if err != nil { + return nil, err + } + return ParseBatchExecutionResultResponse(rsp) +} + // InstallFunctionWithBodyWithResponse request with arbitrary body returning *InstallFunctionResponse func (c *ClientWithResponses) InstallFunctionWithBodyWithResponse(ctx context.Context, contentType string, body io.Reader, reqEditors ...RequestEditorFn) (*InstallFunctionResponse, error) { rsp, err := c.InstallFunctionWithBody(ctx, contentType, body, reqEditors...) @@ -712,6 +825,32 @@ func ParseExecuteFunctionBatchResponse(rsp *http.Response) (*ExecuteFunctionBatc return response, nil } +// ParseBatchExecutionResultResponse parses an HTTP response from a BatchExecutionResultWithResponse call +func ParseBatchExecutionResultResponse(rsp *http.Response) (*BatchExecutionResultResponse, error) { + bodyBytes, err := io.ReadAll(rsp.Body) + defer func() { _ = rsp.Body.Close() }() + if err != nil { + return nil, err + } + + response := &BatchExecutionResultResponse{ + Body: bodyBytes, + HTTPResponse: rsp, + } + + switch { + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 200: + var dest BatchExecutionResult + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON200 = &dest + + } + + return response, nil +} + // ParseInstallFunctionResponse parses an HTTP response from a InstallFunctionWithResponse call func ParseInstallFunctionResponse(rsp *http.Response) (*InstallFunctionResponse, error) { bodyBytes, err := io.ReadAll(rsp.Body) diff --git a/api/execute.go b/api/execute.go index 2cd57ce1..66d9f39d 100644 --- a/api/execute.go +++ b/api/execute.go @@ -26,18 +26,22 @@ func (a *API) ExecuteFunction(ctx echo.Context) error { Config: req.Config, FunctionID: req.FunctionId, Method: req.Method, - // TODO: Init arguments here, update API models. + Arguments: req.Arguments, } - var args []string + // Maintain backwards compatibility: if someone provided function arguments via the `parameters` field + // use them, but only if the current, newer field (arguments) was not used. + // In the future remove 'parameters' field. + if len(exr.Arguments) == 0 && len(req.Parameters) > 0 { - // Maintain backwards compatibility - if someone provided function arguments via the `parameters` field. - if len(req.Parameters) > 0 { + var args []string for _, p := range req.Parameters { args = append(args, p.Value) + } + + exr.Arguments = args } - exr.Arguments = args err = exr.Valid() if err != nil { diff --git a/api/execute_batch.go b/api/execute_batch.go index 9b791495..e96bbc49 100644 --- a/api/execute_batch.go +++ b/api/execute_batch.go @@ -6,6 +6,7 @@ import ( "net/http" "github.com/blessnetwork/b7s/models/request" + "github.com/blessnetwork/b7s/telemetry/tracing" "github.com/labstack/echo/v4" ) @@ -27,18 +28,18 @@ func (a *API) ExecuteFunctionBatch(ctx echo.Context) error { Arguments: req.Arguments, } + // Start a background context but include trace info. + ectx := tracing.TraceContext(context.Background(), tracing.GetTraceInfo(ctx.Request().Context())) + // Background context because we don't want our request to be cancelled if the HTTP request gets cancelled. - res, err := a.Node.ExecuteFunctionBatch(context.Background(), exr) + id, err := a.Node.StartFunctionBatchExecution(ectx, exr) if err != nil { return echo.NewHTTPError(http.StatusInternalServerError, fmt.Errorf("batch execution failed: %w", err)) } - out := BatchExecutionResponse{ - RequestId: res.RequestID, - Code: res.Code.String(), - Message: res.ErrorMessage, - Strands: res.Strands, - } - - return ctx.JSON(http.StatusOK, out) + return ctx.JSON(http.StatusOK, + BatchExecutionResponse{ + RequestId: id, + }, + ) } diff --git a/api/models.gen.go b/api/models.gen.go index c0c2b5c0..4baecaab 100644 --- a/api/models.gen.go +++ b/api/models.gen.go @@ -1,6 +1,6 @@ // Package api provides primitives to interact with the openapi HTTP API. // -// Code generated by github.com/oapi-codegen/oapi-codegen/v2 version v2.4.1 DO NOT EDIT. +// Code generated by github.com/oapi-codegen/oapi-codegen/v2 version v2.5.0 DO NOT EDIT. package api import ( @@ -30,6 +30,15 @@ type BatchExecutionRequest struct { // BatchExecutionResponse defines model for BatchExecutionResponse. type BatchExecutionResponse struct { + // RequestId ID of the Execution Request + RequestId string `json:"request_id,omitempty"` +} + +// BatchExecutionResult defines model for BatchExecutionResult. +type BatchExecutionResult struct { + // Chunks Results of the execution of the Batch Request, executed by different nodes + Chunks map[string]ChunkResults `json:"chunks,omitempty"` + // Code Status of the batch execution Code string `json:"code,omitempty"` @@ -38,11 +47,17 @@ type BatchExecutionResponse struct { // RequestId ID of the Execution Request RequestId string `json:"request_id,omitempty"` +} - // Strands Results of the execution of the Batch Request, executed by different nodes - Strands map[string]StrandResults `json:"strands,omitempty"` +// BatchResultRequest Get the result of an Batch Execution Request, identified by the request ID +type BatchResultRequest struct { + // Id ID of the Batch Execution Request + Id string `json:"id"` } +// ChunkResults defines model for ChunkResults. +type ChunkResults = response.NodeChunkResults + // ExecutionConfig Configuration options for the Execution Request type ExecutionConfig = execute.Config @@ -51,6 +66,9 @@ type ExecutionParameter = execute.Parameter // ExecutionRequest defines model for ExecutionRequest. type ExecutionRequest struct { + // Arguments CLI arguments for the Bless Function + Arguments []string `json:"arguments,omitempty"` + // Config Configuration options for the Execution Request Config ExecutionConfig `json:"config,omitempty"` @@ -59,8 +77,7 @@ type ExecutionRequest struct { // Method Name of the WASM file to execute Method string `json:"method"` - - // Parameters CLI arguments for the Bless Function + // Deprecated: Use the `arguments` field. Parameters []ExecutionParameter `json:"parameters,omitempty"` // Topic In the scenario where workers form subgroups, you can target a specific subgroup by specifying its identifier @@ -132,9 +149,6 @@ type ResultAggregation = execute.ResultAggregation // RuntimeConfig Configuration options for the Bless Runtime type RuntimeConfig = execute.BLSRuntimeConfig -// StrandResults defines model for StrandResults. -type StrandResults = response.NodeStrandResults - // TemplateExecutionRequest defines model for TemplateExecutionRequest. type TemplateExecutionRequest struct { // Config Configuration options for the Execution Request @@ -153,6 +167,9 @@ type ExecuteFunctionJSONRequestBody = ExecutionRequest // ExecuteFunctionBatchJSONRequestBody defines body for ExecuteFunctionBatch for application/json ContentType. type ExecuteFunctionBatchJSONRequestBody = BatchExecutionRequest +// BatchExecutionResultJSONRequestBody defines body for BatchExecutionResult for application/json ContentType. +type BatchExecutionResultJSONRequestBody = BatchResultRequest + // InstallFunctionJSONRequestBody defines body for InstallFunction for application/json ContentType. type InstallFunctionJSONRequestBody = FunctionInstallRequest diff --git a/api/node.go b/api/node.go index 112b8292..4b54e5a0 100644 --- a/api/node.go +++ b/api/node.go @@ -9,14 +9,15 @@ import ( "github.com/blessnetwork/b7s/models/response" ) -// TODO: ExecutionFunctionBatch makes a detour from the established approach +// NOTE: ExecutionFunctionBatch makes a detour from the established approach // by directly using the request/response types. Consider if // other handlers should do the same, bringing down REST API handlers closer // to their p2p counterpart, which is what REST API is trying to emulate. type Node interface { ExecuteFunction(ctx context.Context, req execute.Request, subgroup string) (code codes.Code, requestID string, results execute.ResultMap, peers execute.Cluster, err error) - ExecuteFunctionBatch(ctx context.Context, req request.ExecuteBatch) (*response.ExecuteBatch, error) + StartFunctionBatchExecution(ctx context.Context, req request.ExecuteBatch) (string, error) + GetBatchResults(ctx context.Context, id string) (*response.ExecuteBatch, error) ExecutionResult(id string) (execute.ResultMap, bool) PublishFunctionInstall(ctx context.Context, uri string, cid string, subgroup string) error } diff --git a/api/server.gen.go b/api/server.gen.go index 990a6afc..e7cfac4a 100644 --- a/api/server.gen.go +++ b/api/server.gen.go @@ -1,6 +1,6 @@ // Package api provides primitives to interact with the openapi HTTP API. // -// Code generated by github.com/oapi-codegen/oapi-codegen/v2 version v2.4.1 DO NOT EDIT. +// Code generated by github.com/oapi-codegen/oapi-codegen/v2 version v2.5.0 DO NOT EDIT. package api import ( @@ -24,6 +24,9 @@ type ServerInterface interface { // Execute a Bless Function with a batch of arguments // (POST /api/v1/functions/execute/batch) ExecuteFunctionBatch(ctx echo.Context) error + // Get the result of a Batch Execution Request + // (POST /api/v1/functions/execute/batch/result) + BatchExecutionResult(ctx echo.Context) error // Install a Bless Function // (POST /api/v1/functions/install) InstallFunction(ctx echo.Context) error @@ -58,6 +61,15 @@ func (w *ServerInterfaceWrapper) ExecuteFunctionBatch(ctx echo.Context) error { return err } +// BatchExecutionResult converts echo context to params. +func (w *ServerInterfaceWrapper) BatchExecutionResult(ctx echo.Context) error { + var err error + + // Invoke the callback with all the unmarshaled arguments + err = w.Handler.BatchExecutionResult(ctx) + return err +} + // InstallFunction converts echo context to params. func (w *ServerInterfaceWrapper) InstallFunction(ctx echo.Context) error { var err error @@ -115,6 +127,7 @@ func RegisterHandlersWithBaseURL(router EchoRouter, si ServerInterface, baseURL router.POST(baseURL+"/api/v1/functions/execute", wrapper.ExecuteFunction) router.POST(baseURL+"/api/v1/functions/execute/batch", wrapper.ExecuteFunctionBatch) + router.POST(baseURL+"/api/v1/functions/execute/batch/result", wrapper.BatchExecutionResult) router.POST(baseURL+"/api/v1/functions/install", wrapper.InstallFunction) router.POST(baseURL+"/api/v1/functions/requests/result", wrapper.ExecutionResult) router.GET(baseURL+"/api/v1/health", wrapper.Health) @@ -124,51 +137,54 @@ func RegisterHandlersWithBaseURL(router EchoRouter, si ServerInterface, baseURL // Base64 encoded, gzipped, json marshaled Swagger object var swaggerSpec = []string{ - "H4sIAAAAAAAC/+xbW2/bOhL+KwR3H2U7cW5o3tI03Qbb02aTgxa7B4FBSSOJjUSqJOXELfzfF7zoYku+", - "xqnbgz41pShyNPPNffwdBzzLOQOmJD7/jmWQQEbMnxdxLCAmCsJbkEWq9FoIMhA0V5QzfI7tOuIRIgxd", - "PUFQ6AfoFr4WIBX2cC54DkJRMAdGQj9gwaR90tvykT5MJVQiYc8mGWcxImmKGA9BIpUQhcBcBSFSCSBR", - "3QZPJMtTwOcH/dNTD6tJDvgcsyLzQWAPP/Vi3nOLUcqJOj1urvbkA8173FBE0l7OKVMg8LkSBUw9nAMI", - "2Sb8PfXzYY6u30hLOaAPNZ0xV82PaZL4Fz4cvjn6N+efb/Oji08PZ19VMLwYnz7Rr/HFN3L4P148yP+Q", - "/wZ3w2D84dXxw7u7S06wt81rPr73MFWQGfodB6QSlMV4WvGJCEEmGzBEVKD4p4AIn+N/DGooDRyOBhUq", - "HIam9YXc/wKBmhMMKUHXvy15VhNEs5wLc2VOVILPcUxVUvj9gGcDPwUpGahHLh4G/pkcaLwMquP0h677", - "ZfPA7xS7NLgvGP1agJNvBYEuVaj4v4xbLZVbIp4OZsm9cEspQf1CwYVSIBXv0hLNBioAyRwCGtEAkXIv", - "IhKNeREkWrvmDQaQIOlUuZvhDboBEKXe6Y0oIywkiotJdXqT7fvTvF0pHGcw4tFa/KjZ+5iAAPRozaQW", - "AVEoBaLRy+DvZJBW2BXnMvodaN1aZzIeQioH7uhNdOY1UUHSMI3WSOhbZ/BPRFxkpWeeFfrl+2tUPUYR", - "F8bwvNYkorcFC8y2pnj/wr0eETH2cESFVD3KxjwgeluvPAd7mKsERH1wv9/H9179qoSAs3C7d1VCxSav", - "NiGyJla21y0FWZ5qw7fCQP/p9rVkp8/gOQ3akrpmRjQyAEYE5aVGcvEAwkguQ7LwY8GLXHpowgsUEIYU", - "ETEoRGqTWW5C/sQtTiiLEVUS0RCYohE1Yc4sizZQH2chQm0LKn54DRDez6tYB5JlzpmENpQDHkKbN3eK", - "qKJym74+ywV3c/DFw4ODrb/NwxlISeIOAq7tzeYr2m4bRYSmEHo2iHOHoIzGiUIJGQPKuABEWcQR8Xmh", - "zFkgBBfPoNXFsyMadpD7ZlmIUXPLP418PziB3mF4eNo7BvKq55+cnPVODqNjckr8k9OT4BkkSiUIC62J", - "CkNqd93MyHuJDt2Zt7siluW3duUeFXYq1JQLVqSOOV6dMvgTFNIoAgFM2ZwCL/Ecm7mcqYcruVxyFtG4", - "w3Cb9UIQS6xZrg346iSKlA5sZTCpE5GLevfUw4HWTSYLOSJpzAVVSdYm8HNCgwRVW1G1FcmEF2mIfDBW", - "C0JHNZULdDb3I/UMlAEbj8akK5y8YmMqONNWCY2JoES75sVOcK3Q+wPJIPxE0gKe4UZsrjni0cgiq0X5", - "B7NBY7SRzjq+OoiucOSHFXH66ng2s12Zw4qMSqkR1ybtpn7YhmNnJI0TpXJ5PhiQnPbdqo6UsLfjJHNU", - "JiaG0uVytHbhovGCPqZgimYrvfut3eZU1xi6kLJOt8VCIkJ0zfJCrRF9rf3GtrqiEgEy4WmHz7jhomkX", - "27gTxmmH6JGqBJGy8KK4sQE0hHkdR7IIApAyKtJuYLYrLquopxnwoqPA9I4/opSzuCR11tIr8gB4a3VY", - "M2FwYNhDklAp3w0RJAPzaD6sGhuD1QqMt4z67Gn36zGmpmqfvFmYOwWV/12rPFXrfOS0sTMAu6wjsKhL", - "z30STXygZHg8Pg6+kbHKv4yHAT/6cnLMj8nJNxUWX4N8MqEMxJeYBU9nciiHQ3kG5FnhrUp4B7Xap5Xk", - "fr64+wNFNAWt2SXHm6QnkKa898hFGvYficyeQU9eQmMXiWsFctzrBSntRSmJD/HUq9fNv7NL9dZhe+sQ", - "T+/XjAo6dPAZSebPkSDWIt9VtthUmQqN99uG1TPavTCfTAvpLOKqGPjSbTUB8OosdB/55+/Mc6N4cOM6", - "ulzed9gAjp19sItAFSRFvFB50ZGSeiilD4CqGPCj2efVC1dacB66eqIKXfIQEKig32+Xw5+oGnVD2Lyq", - "H3WhePs0PwQhloS/hu4d39gZB86xbndXrhkEusTA3r6PeKf0jNdMKpKmi6OeXydoWewNyS/lCz1cCDqb", - "F+/Kr2ppPsONtkCzqji7G3c3fT7F1tA2UD6LkX+Bcq3/ZfMHXi1qU/ZrDAug6zct6/rTer05UOwGEyWH", - "f8dXv+OrfcdX74CkKrEo6UhidVQj7UPvZzVcjepxOzpEDzDpmcwT5YSK1lcwks19hVnZXopVVag+0S7t", - "yAQ58jYqF12x8Seyl1rRXB+kLZ/qmS2LNowUi5GFny0/asvQ1ZABDU+d/9ZMaum3RFQhBoE2NWJS32TO", - "r5s6iAhwAzG2v+JPtHNrTLRUMo1IKqFivs95CoRtgBLSnNdZqu3tmYkSZJYDafoxMpWa1YxtsXPqfV+X", - "4vUbOPcbj4PIfSHzsna084GwDndtm7D2Qc4vz3WQGgORZtqwu3WYka5mhhsfqh3QjaCZhqg+vyrQlfFA", - "0xdtOTC0dVVx4QSmpX9uApMyy42a8n3Pgm31WrC7EbJ1Ox4Vw364QrSbd63AFBjx02bLY3PDN1ue3rQp", - "/P2FSw0tFuxDDjON0A1nGGwt3x3RMkIh+EU80pH1s2QYCjoGIUeCczWyvPj+jEkDJSZzre3ddUOiAtIG", - "dZu37VMex9ZDbJ8fZVx0jN3/YdZRSjOquqc5tiZaFGxU9tx/uqbu6/d3sxDfg44tHCX83cr8aVqZL9rn", - "0qfDkwLBSPqGBx2hzVvKQqQjP1ONsEHg3SOJrUYUInUzOOeDgbTLfcq14EvzOnvcn1q5qUSvz+7QOyCh", - "DcbvQIxBIJ9ICBG3vciPObCLm2t01D+oaqrG2Pc1K6kyXNbHmBNuQSqkt/eaL+oEEYS0Vx/0j/uvNGU8", - "B0Zyis/xUf+gf6TdA1GJ+fYByelgfDgouVxrldYI3lUIvHIjU6TdQNYKZEi+DuuNjecuSH7Nw4nTMQXM", - "XEHyPHWfO/gibQxilWyDH7u4qeBpa25xCck11OpqjCnPGfYMDw5eglBXAOyg9K4a82l4hKmHjy0h89nK", - "mKQ0bBhyUU9Gn3S/YaGPpAWgra5pMmSR6QRkObMUiWVTJyU2Sd9CEA3MdPHmUCrHoh6pztN1vG3qzdVQ", - "9AqkmTHUF4Jb968INsCcnbj+ochbMC/+a8GvxIQdWJ+Dw9rApLYtsxiSrm+z2rq5jS9s3Rb0IDtEt4Tw", - "H4e0Rd2vxfSSJnAQCR4Yf0whjCGcw8WS71tb+u4a/Uc5VNCNgvWaXQvsUD208LKYmG3YTWcDpx8l6bme", - "1kJDaKVsuClACQpjLeEVlmUX9mRtUa5AUWIaJpqGGDoQc5lA8GBDM7dzHhzvyuUXk8lMT6dDEoY6Kh2B", - "kzlGdX1ByRO3cG8OdYstZz4GMVEJZbGLmmf1VOKNIu+ZWFueD2wW2C/TwJAHcuD+o+Fh2zkN2U29+eM/", - "gaCRq7Da70GEhYiMCU2JT1OqJrg6yH3w9H76/wAAAP//Wu9Psq0/AAA=", + "H4sIAAAAAAAC/+xbWXPbOBL+KyjuPlKSLV8VvzmOs3FtJvHas0ntTrk0INkkEZEAA4CylZT++xQOXiJ1", + "21YylafEEAg2ur9ufN0Nfnd8lmaMApXCOf/uCD+GFOv/XkQRhwhLCG5B5IlUYwEIn5NMEkadc8eMIxYi", + "TNHVI/i5+gHdwtcchHRcJ+MsAy4J6AVDrn6g/rS90tviJ7WYjIlA3KyNU0YjhJMEURaAQDLGEoF+FQRI", + "xoB4+TZ4xGmWgHN+0D89dR05zcA5d2ieesAd13nsRaxnB8OEYXl6XB/tiTHJekxLhJNexgiVwJ1zyXOY", + "uU4GwEVb8PfEy4YZun4jjOSAPlRyRkzWN1MX8Q/ncPjm6N+Mfb7Nji4+jc++Sn94MTl9JF+ji2/48P8s", + "H4v/4P/5d0N/8uHV8fjd3SXDjrvNY55z7zpEQqrltxoQkhMaObNST5hzPN1AIbwExT85hM65849BBaWB", + "xdGgRIXF0Kx6IfO+gC/nDIML0PVvC51VApE0Y1y/MsMyds6diMg49/o+SwdeAkJQkA+MjwfemRgovAzK", + "5dRG193ZPPA7zS407nNKvuZg7VtCoMsVSv0v01bL5ZaYp0NZYi/akpITL5dwISUIybq8RKmBcEAiA5+E", + "xEe4mIuwQBOW+7HyrvmAAdiPO13uZniDbgB44XdqIkoxDbBkfFquXlf7/jzvqRyOURixcC19VOp9iIED", + "ejBhUpkAS5QAVuil8HcKSCviij0y+h1o3dpnUhZAIgZ26U185jWWflwLjSZIqLc28I95lKfFydw0+uX7", + "a1T+jELGdeB5rUREb3Pq62l18/7h9HqYR47rhIQL2SN0wnyspvWKdRzXYTIGXi3c7/ede7d6VIDPaLDd", + "szImfJNH6xBZEyvb+5aENEtU4FsRoH+381q2U2uwjPhtS11TbRrhA8WcsMIjGR8D15ZLkci9iLM8Ey6a", + "shz5mCKJeQQS4SpkFpOQN7WDU0IjRKRAJAAqSUg0zWmqaAP3sREiULGg1IdbA+H9vIt1IFlkjApoQ9my", + "tBEJOjT0ZtnBWULY8U5Dz/NPoHcYHJ72jgG/6nknJ2e9k8PwGJ9i7+T0xN9FAas3ZwlPc2t+nNOx8dcg", + "IGbxm8aMZYi6VE8Xx/fM7WTYJbGAUj92QEtYKMutiLE3RQEJQ+BApWHOGziDzwJoW+lOYpmXknj6xaU8", + "DTsNDw62NoPrpCAEjjoEuK5vuYUUFGKSQOAawm0XQSmJYoliPAGUMg6I0JAh7LFcGn1yzvgOsv68qDbA", + "qp08TeH/BdImV7UMb4Hm3Sr+aODVkjJ0/aZF6pbrasE7XlRj9UBIgnbYWx8fDeduBQ6VUi7ThfodPcSs", + "nu8S0VDFlpxte7yXW1kU6+zSnbFOG7dgJ60MZ0PtriB83B5FfZWPN+ywK98rVta5bInUS0ZDEnUQNT2e", + "c2zCth6uCNvqogkuCOvKk0Rt9KKarQM5FUBFLkY4iRgnMk7bAn6OiR+jcioqpyIRszwJkAeapUBgpSZi", + "QdzPvFDuAC6gk9EEd6WPV3RCOKOKhaAJ5gQr0ywmvWul2h9wCsEnnOSwA200taURC0fmjG1J/kFPUA5d", + "K19ZvVqvXkHcD0vh1KujZiVrZc2Kp0QIhbi2aDfVj204dmbOTixlJs4HA5yRvh1VnuK4T1xUGhWFCC3p", + "cjsar76oPaCWyakk6Uo2f2umWdeduY6QAaGd1IcGmAfomma5XCPbWvuJbX1FxhxEzJKOs/SG8TpDbOPO", + "xK8APRAZI1wc8ZLpGEACmPdxJHLfByHCPOkGZrvCukp6kgLLO2jHO/aAEkajQtQm55V4DM7W7rBmgcCC", + "YQ9FgdL5bjDHKUhDDpqnwUQHrFYivCW5Mavdr6eYSqp96uYHr5U8R6HdL5nFWoX2KpqFdm+d6cllxTPD", + "rgjm4XDqAcHD48mx/w1PZPZlMvTZ0ZeTY3aMT77JIP/qZ9MpocC/RNR/PBNDMRyKM8A7JX8yZh3SqtO6", + "EPfzxd1vKCQJqJhVYKkuegxJwnoPjCdB/wGLdAd5sgL0FlYZBx9L5T1qQh0zpW86vZ6fkF6Y4OjQmbnV", + "uP63OVRNHbanDp3Z/ZpkpiN0dOGtkr/HAQut2f8Kw0D+LIH8JwoJJEF/k4D+Y5S+Kgg8VfpXd6ESnTsk", + "hGtUyvwkFzb2r2L7l3bqmjWbfVRrftVpNk+tN+kQiuUd1Q3g2Nnhv/BljhPEcpnlHWVIFyVkDKhkux/1", + "PLcauFKGc9HVI5HokgWAQPr9frvR90jkqBvC+lH1UxeKt1W2kAFwvoToa7mf+I2djHdOdU/3yjXprk2B", + "zNv3wewKAnZNhcRJspDf+T8PiVl8GuKf6ix0nZyTZgXgqc5Vf7e6ags0Cw9TG1Oe5rib7S7xNnX35624", + "/11q7fMa/sWvfvGrffOrd4ATGRuUdCS1itUI86P7owauWp28zQ7RGKY9nayiDBPe2gXF6dwu9Mj2Vizr", + "X9WKZuiJQpAVb6PC2BWdfMJ7qYrNdXza9il/MwXgWpCiETLwM4VWFRm6Wk+g4Kny30pJLf8WiEhEwVeh", + "hk+rN+n1q/YVwhzsVT/TSfKm6nCr3dUrbRriRECpfI+xBDDdACW4fhNxqbe3b4MVIDMaSJKPoS7urFZs", + "S50z9/u6Eq/fqrrf+KKb2BcyL6uDdp4IK7prGqLVGWTP5bleWaP1Xbvr3URqirvaNvZiZHUA3XCSKoiq", + "9cs6cMEH9thWX3y33Mg/d7ecUKONSvJ933Ld6jH/6S7HrtvbKRX24g7RblO2iClQ7CX15s7mga9Zrt60", + "/f39mUsNLRXsww6Nlu+GtzVMy8gu0QpCAXh5NFLMeicbBpxMgIsRZ0yOjC6+73CnQvLpXBP/6bojYQ5J", + "TbrNLygkLIrMCbF9fpQy3vFB0W96HCUkJbL73srWQvOcjorbBT9c+/r1+7smxPfgYwsvSXeUZ361NvfT", + "2nzWPpdaHR4lcIqTN8zvoDZvCQ2QYn66GmFI4N0DjoxH5Dyxt43OBwNhhvuEKcMX4bW53O/KuYlAr8/u", + "0DvAgSHjd8AnwJGHBQSImV7kxwzoxc01OuoflDVVHez7SpVEai2rZfQKtyAkUtN79QdVgghcmFcf9I/7", + "r5RkLAOKM+KcO0f9g/6ROh6wjPXeBzgjg8nhoNBy5VXKI1hXIfDKXg7D7XsKyoG0yNdBNbH2uyXJr1kw", + "tT4mgepX4CxL7HYHX4ThIMbJNviMz37vMGvdVV8icgW1qhqjy3NaPcODg+cQtLyl2ZL0rrzQVDsRZq5z", + "bASZz1YmOCFBLZDz6puPk+4nDPSRMAA01TUlhshTlYAsV5bEkaj7pHB00rcQRAN9F39zKBUXwB6IytMV", + "39b15vIezQqk6cu8zwS37u+jNsCc+T7hRZG34EuYnwt+BSbM5x1zcNgOmIPqQ+FufHb0PJZ8BtDEZOcX", + "Os+IyWb7ZtY8Rl8cYeaD6ha+5pVnFctBcgITCFZi7SkQtplV14YWMR2/xWiyLcHVB6ed+MwH54L2dofV", + "lgj+chBb1FhdLC+uxySE/TFlDwkEkYJZAxBL9re29e1rxDYxha4RTl4mknT3gl84mCxoly48Y3+kQEK3", + "iSGx7sUpGSLoQMxlDP7YsH47cx4c74rhZ7NJo13YYQktHRFWwOmcorp2UOjEDtzrRe1giydOgE9lTGhk", + "E7Kmn5oPz9ZO6hppnDgfmAJDv6gwBMwXA/uHgofpFNZsN3Pnl/8EnIS2eG/2gzANEJ5gkmCPJEROnXIh", + "u+HZ/eyvAAAA//9aHlaO4kYAAA==", } // GetSwagger returns the content of the embedded swagger specification file diff --git a/cmd/node/main.go b/cmd/node/main.go index 2a7fde66..b3471fd4 100644 --- a/cmd/node/main.go +++ b/cmd/node/main.go @@ -24,9 +24,9 @@ import ( b7slog "github.com/blessnetwork/b7s/log" "github.com/blessnetwork/b7s/models/bls" "github.com/blessnetwork/b7s/node" - "github.com/blessnetwork/b7s/store" - "github.com/blessnetwork/b7s/store/codec" - "github.com/blessnetwork/b7s/store/traceable" + "github.com/blessnetwork/b7s/stores/store" + "github.com/blessnetwork/b7s/stores/store/codec" + "github.com/blessnetwork/b7s/stores/store/traceable" "github.com/blessnetwork/b7s/telemetry" ) @@ -247,7 +247,7 @@ func run() int { } case bls.HeadNode: - node, err = createHeadNode(core, cfg) + node, err = createHeadNode(ctx, core, cfg) } if err != nil { log.Error().Err(err).Msg("could not create node") diff --git a/cmd/node/metrics.go b/cmd/node/metrics.go index b9faf6fa..35cb4f38 100644 --- a/cmd/node/metrics.go +++ b/cmd/node/metrics.go @@ -3,7 +3,7 @@ package main import ( "slices" - mp "github.com/armon/go-metrics/prometheus" + mp "github.com/hashicorp/go-metrics/prometheus" "github.com/blessnetwork/b7s/consensus/pbft" "github.com/blessnetwork/b7s/consensus/raft" diff --git a/cmd/node/node.go b/cmd/node/node.go index 25c0befd..52b4d5a4 100644 --- a/cmd/node/node.go +++ b/cmd/node/node.go @@ -9,9 +9,11 @@ import ( "github.com/blessnetwork/b7s/executor/limits" "github.com/blessnetwork/b7s/fstore" "github.com/blessnetwork/b7s/models/bls" + b7smongo "github.com/blessnetwork/b7s/mongo" "github.com/blessnetwork/b7s/node" "github.com/blessnetwork/b7s/node/head" "github.com/blessnetwork/b7s/node/worker" + "github.com/blessnetwork/b7s/stores/batch-store/mbs" ) type Node interface { @@ -63,9 +65,43 @@ func createWorkerNode(core node.Core, store bls.Store, cfg *config.Config) (Node return worker, shutdown, nil } -func createHeadNode(core node.Core, cfg *config.Config) (Node, error) { +func createHeadNode(ctx context.Context, core node.Core, cfg *config.Config) (Node, error) { - head, err := head.New(core) + var opts []head.Option + + batchServer := cfg.Head.Batch.Server + if batchServer != "" { + + log.Info(). + Str("server", batchServer). + Str("db_name", cfg.Head.Batch.DBName). + Msg("initializing mongo batch server") + + cli, err := b7smongo.Connect(ctx, batchServer) + if err != nil { + return nil, fmt.Errorf("could not connect to batch server: %w", err) + } + + bs, err := mbs.NewBatchStore(cli, mbs.DBName(cfg.Head.Batch.DBName)) + if err != nil { + return nil, fmt.Errorf("could not create batch store: %w", err) + } + + err = bs.Init(ctx) + if err != nil { + return nil, fmt.Errorf("could not initialize batch store: %w", err) + } + + log.Info().Msg("initialized mongo batch server") + + opts = append(opts, head.BatchStore(bs)) + } + + if cfg.Head.Batch.RequeueInterval > 0 { + opts = append(opts, head.BatchRequeueInterval(cfg.Head.Batch.RequeueInterval)) + } + + head, err := head.New(core, opts...) if err != nil { return nil, fmt.Errorf("could not create a head node: %w", err) } diff --git a/config/config.go b/config/config.go index bce16e51..1d117a18 100644 --- a/config/config.go +++ b/config/config.go @@ -76,6 +76,13 @@ type Connectivity struct { type Head struct { RestAPI string `koanf:"rest-api" flag:"rest-api"` + Batch Batch `koanf:"batch"` +} + +type Batch struct { + Server string `koanf:"server" flag:"batch-db-server"` + DBName string `koanf:"db-name" flag:"batch-db-name"` + RequeueInterval time.Duration `koanf:"requeue-interval" flag:"batch-requeue-interval"` } type Worker struct { @@ -184,6 +191,12 @@ func getFlagDescription(flag string) string { return "tracing exporter HTTP endpoint" case "prometheus-address": return "address where prometheus metrics will be served" + case "batch-db-server": + return "mongodb server to use for persisting batch data" + case "batch-db-name": + return "database name to use for persisting batch data" + case "batch-requeue-interval": + return "interval at which batch requests should be checked or requeued" default: return "" } diff --git a/config/flags.go b/config/flags.go index b091c4a5..db5c01c3 100644 --- a/config/flags.go +++ b/config/flags.go @@ -4,6 +4,7 @@ import ( "errors" "fmt" "strings" + "time" "github.com/knadh/koanf/providers/structs" "github.com/spf13/pflag" @@ -54,6 +55,9 @@ func addFlag(fs *pflag.FlagSet, fc CLIFlag) error { case []string: fs.StringSliceP(fc.Flag, fc.Shorthand, nil, fc.Description) + case time.Duration: + fs.DurationP(fc.Flag, fc.Shorthand, def, fc.Description) + default: return errors.New("unsupported type for a CLI flag. Extend support by adding handling for the new flag type") } diff --git a/config/load.go b/config/load.go index abc6774a..33edcbdf 100644 --- a/config/load.go +++ b/config/load.go @@ -72,6 +72,11 @@ func load(args []string) (*Config, error) { return nil, fmt.Errorf("could not unmarshal konfig: %w", err) } + err = cfg.valid() + if err != nil { + return nil, fmt.Errorf("invalid configuration: %w", err) + } + return &cfg, nil } diff --git a/config/valid.go b/config/valid.go new file mode 100644 index 00000000..df014b64 --- /dev/null +++ b/config/valid.go @@ -0,0 +1,14 @@ +package config + +import ( + "errors" +) + +func (c *Config) valid() error { + + if (c.Head.Batch.Server != "") != (c.Head.Batch.DBName != "") { + return errors.New("batch db server and database name have to be either set or unset") + } + + return nil +} diff --git a/consensus/pbft/execute.go b/consensus/pbft/execute.go index e5226120..14fd89a9 100644 --- a/consensus/pbft/execute.go +++ b/consensus/pbft/execute.go @@ -5,7 +5,7 @@ import ( "fmt" "time" - "github.com/armon/go-metrics" + "github.com/hashicorp/go-metrics" "github.com/libp2p/go-libp2p/core/peer" "github.com/blessnetwork/b7s/models/bls" diff --git a/consensus/pbft/params.go b/consensus/pbft/params.go index 3e545b38..aa353608 100644 --- a/consensus/pbft/params.go +++ b/consensus/pbft/params.go @@ -4,7 +4,7 @@ import ( "errors" "time" - "github.com/armon/go-metrics/prometheus" + "github.com/hashicorp/go-metrics/prometheus" "github.com/libp2p/go-libp2p/core/protocol" ) diff --git a/consensus/pbft/pbft.go b/consensus/pbft/pbft.go index 5d209749..7adc0dd1 100644 --- a/consensus/pbft/pbft.go +++ b/consensus/pbft/pbft.go @@ -10,7 +10,7 @@ import ( "strings" "time" - "github.com/armon/go-metrics" + "github.com/hashicorp/go-metrics" "github.com/rs/zerolog" otelcodes "go.opentelemetry.io/otel/codes" "go.opentelemetry.io/otel/trace" diff --git a/consensus/raft/fsm.go b/consensus/raft/fsm.go index 9cdd30e7..d8787135 100644 --- a/consensus/raft/fsm.go +++ b/consensus/raft/fsm.go @@ -7,7 +7,7 @@ import ( "io" "time" - "github.com/armon/go-metrics" + "github.com/hashicorp/go-metrics" "github.com/hashicorp/raft" "github.com/libp2p/go-libp2p/core/peer" "github.com/rs/zerolog" diff --git a/consensus/raft/params.go b/consensus/raft/params.go index 05e6a8e4..9d7f5094 100644 --- a/consensus/raft/params.go +++ b/consensus/raft/params.go @@ -3,7 +3,7 @@ package raft import ( "time" - "github.com/armon/go-metrics/prometheus" + "github.com/hashicorp/go-metrics/prometheus" ) // Raft and consensus related parameters. diff --git a/docs/batch_execution.md b/docs/batch_execution.md new file mode 100644 index 00000000..5985946d --- /dev/null +++ b/docs/batch_execution.md @@ -0,0 +1,540 @@ +## Batch Execution + +Batch execution request allows specifying many execution requests in a single call. +Batch execution request specifies execution of a single Bless function with many different arguments lists. + +## Batch Execution Flow + +A Batch Execution Request consist of a template and a list of arguments for each individual execution. +Template resembles an ordinary execution request - it specifies the Bless function to be executed and the configuration. +Arguments are specified in a list of arguments lists. + +If we have a Batch Execution Request which will produce 100 executions - we will have 1 template and a list with 100 arguments lists. + +The input Batch Execution Request is termed a `Batch` in the head node. +Each combination of a template and an argument list is a `Work Item`. +Thus, a Batch which specifies 100 individual executions is a Batch consisting of 100 Work Items. + +When a Batch Execution Request is received, the head node does a roll call. +The number of workers it waits for is the number specified in the input request. + +After enough workers have responded, head node will split the work items into `Chunks`. +Number of chunks is equal to the number of workers that have been chosen for the work. + +To recap - `Batch` consists of individual `Work Items`. +After workers have been chosen, we have a `Batch` split into `Chunks`, where each chunk has a worker it was assigned to, and `Chunk` consists of `Work Items`. + +Currently Work Items are assigned to Chunks/workers in a round-robbin manner. +One Work Item is only assigned to a single Chunk. + +Chunks are then sent to chosen workers for execution. + +When a Batch is created, a handle - UUID - is returned to the user. +The user can use this handle to query for the current status/progress of the execution. + +As workers are completing their chunks, they respond to the head node with their results. +Head node processes these messages and updates corresponding work items in the DB. + +For each work item, we store the standard output of the execution, and the status of the execution - pass/fail. + +When processing a result for a chunk, we do some validation: +- was the sending node indeed the node that was assigned this chunk +- was the work item whose result we're processing indeed part of the chunk in question +- was the work item really in the "in progress" state - to prevent overwriting already executed items + +When we process a chunk result, we do additional checks and, if all of the items in a chunk have been processed, we mark the chunk as `DONE`. + +### Head Node Boot Sequence + +After a head node boots, it should restart any past executions. + +When the head node main loop starts, it will allow for a short delay to allow connections to workers to be re-established. +This delay is one minute, by default. + +After this, it will try to resume any batches that have not been completed. +First, we will query the Batch Store for any Batches in the state `CREATED` or `IN PROGRESS`. + +Then, it will iterate through the list of batches and attempt to resume them. + +Batch resume involves finding all work items belonging to the batch that are in the `CREATED` or `FAILED` state. +You can read more about this in the [Work Item States](#work-item-states) section. + +For all `FAILED` work items, we check if they have passed the failure threshold, and, if so, we now mark them as `PERMANENTLY FAILED`. +For all other work items, we start another round of batch execution - which includes doing a roll call to find new set of workers. +We will create new chunks and assign these work items to them, and send them to workers for execution. + +This process can repeat until all work items are either `DONE` or `PERMANENTLY FAILED`. +At this point, Batch is marked as complete. + +This process is also done at regular intervals, controlled by the _Requeue Interval_ config option. +By default this interval is 1 hour. + +One important thing to note is - batch is considered complete only when a resume is attempted. +This will happen either on node boot or on next requeue interval. +This is done so we do less frequent querying. + +### Batch Execution Result Format + +When returning Batch Execution result, we return a list (actually a map) of individual chunks comprising that batch. +Chunk IDs are v4 UUIDs and have no meaning. + +For each Chunk, we include the ID of the peer that performed that execution and the list of results. +Results of Work items in a chunk are presented in a map, mapping work item ID to the result of the execution. + +### Work Item ID + +We must provide a way to map one specific instance of execution to its result. +For example we want to execute function found at CID `c`, specifically `f.wasm`, with two sets of arguments: `--input-arg1 a1 --input-arg2 a2` and `--input-arg2 b1 --input-arg2 b2`. +How do we find out which result belongs to which? + +For this, we introduced a work item ID which can be derived from its inputs. +This way, by knowing the execution inputs, you can derive the ID that you need to lookup in the set of results. + +Work Item ID is calculated as an md5 checksum of the function invocation, presented as `C/f.wasm `. +For the first execution above, that would mean `c/f.wasm --input-arg1 a1 --input-arg2 a2`. + +For a real world example, `bafybeie3nlygbnuxhvqv3gvwa2hmd4tcfzk5jtvscwl6qs3ljn5tknlt4q/echo.wasm https://example.com/dir1/dir2/resource/some-random-slug-1` produces an MD5 hash of `268a4145a50ade48aed2b1147d3518c6`. +Thus, when iterating results a chunk, we only need to be on the lookout for this specific key to find our results. + +## Stores + +Batch Execution requests can consist of many individual Work items and their execution can take a long time. +To handle this head node persists this information to a `Batch Store`. + +### MBS - Mongo Batch Store + +Mongo Batch Store will use an external MongoDB server as a backing store. +This allows head node to save information to a MongoDB and resume work even after it has restarted. + +The downside is that there needs to be an externally configured (and reachable) MongoDB server that the head node can use. + +When a Mongo Batch Store is initialized, it will try to connect and validate connection to a MongoDB server (by issuing a Ping request). +If the connection is successful, it will proceed to create/configure the collections we will use. +By default Mongo Batch Store will try to create collections to use: + +- `b7s-batches` +- `b7s-batch-chunks` +- `b7s-batch-work-items` + +We have created schemas for these collections, to ensure proper validation. +Schema files can be found [here](stores/batch-store/mbs/validation). + +If the collections already exist with the same configuration, the attempt to create the collections again is a no-op. +An attempt to create a collection with a different schema than the one that already exists will fail. +However, because of how MongoDB Go driver operates, identical schemas can be encoded differently - e.g. different order of fields. +This may lead to errors that are false positives. + +As we assume some exclusivity on the MongoDB server, we do ignore these specific errors as we assume the cause for them is the one listed above. + +To specify a MongoDB server we can use command-line arguments: + +```console +$ ./node --batch-db-server mongodb://172.26.176.1:27017/ --batch-db-name b7s-db # other CLI arguments +``` + +Alternatively, MongoDB server can be specified using the standard head node config file: + +```yaml +head: + batch: + server: mongodb://172.26.176.1:27017/ + db-name: b7s-db +``` + +### IBS - In-Memory Batch Store + +MongoDB Batch Store relies on having an externally configured and reachable MongoDB server. +This is a hindrance to users wanting to run a head node without external dependencies. + +In order to provide this, we include an In-Memory Batch Store. +This Batch Store keeps all data in working memory of the head node. + +Obviously, this Batch Store does not survive head node restarts and all content is gone after the head node has stopped. + +### Work Item Database ID + +Function invocation does not have to be universally unique. +Many users can execute the same Bless function with the same arguments. + +In order to avoid collisions, individual work items are stored in the DB using the `/` format. + +You can read more about the work item ID format [here](#work-item-id). + +### Work Item States + +Work Item can have a different number of states during its lifetime. + +- `0` - `CREATED` - Work item was saved but not yet assigned to a chunk or execution has been started +- `1` - `IN PROGRESS` - Work Item was created and sent to a worker for execution +- `100` - `DONE` - Work item was executed by a Worker as part of a chunk. The result of the execution resulted in a successful exit code (0). +- `-1` - `FAILED` - Work item was executed by a Worker as part of a chunk. The result of the execution resulted in a non-zero exit code. +- `-2` - `PERMANENTLY FAILED` - Work Item was executed by a Worker or Workers multiple times, after which we will no longer retry execution. + +Execution of a Work item can fail for various reasons. +For example, an HTTP call could fail because the IP of a Worker was added to a blocklist by the target server, or a resource was not available. +In that case it makes sense to retry that execution, potentially by a different Worker. +However, we don't want to retry execution indefinitely. + +We allow the user to specify a `max_attempts` field, which is an unsigned number specifying how many times will we try to execute a single Work item. +After this threshold has been reached, we give up on the individual work item. + +Note that a user can be willing to specify an arbitrarily large limit for Work items. +From the perspective of both head and worker nodes, this means many retries and potentially wasted resources. + +To prevent this, we allow the head node operator to specify a global limit on number of retries. +When we consider if a work item should be retried, we use the _lower_ of the two limits. + +By default the Head node will give up after 10 failed attempts. + + +### Batch Execution REST API Call + +Below is a curl invocation starting a batch execution request for a function `bafybeie3nlygbnuxhvqv3gvwa2hmd4tcfzk5jtvscwl6qs3ljn5tknlt4q`, which requires 4 nodes, and a list of four lists of CLI arguments for the Bless function. +Note that in this case each argument list consists of a single argument (a URL) but that does not have to be the case. + +```sh +curl --location '127.0.0.1:8080/api/v1/functions/execute/batch' \ +--header 'Content-Type: application/json' \ +--data '{ + "template": { + "function_id": "bafybeie3nlygbnuxhvqv3gvwa2hmd4tcfzk5jtvscwl6qs3ljn5tknlt4q", + "method": "echo.wasm", + "config": { + "number_of_nodes": 4 + } + }, + "arguments": [ + ["https://example.com/dir1/dir2/resource/some-random-slug-0"], + ["https://example.com/dir1/dir2/resource/some-random-slug-1"], + ["https://example.com/dir1/dir2/resource/some-random-slug-2"], + ["https://example.com/dir1/dir2/resource/some-random-slug-3"] + ], + "max_attempts": 2 +}' +``` + +As a result of this call, a batch execution request will be _started_. +The batch execution request is _not_ synchronous. + +Example output: + +```json +{ + "request_id":"a642f006-6631-4d96-9f84-171c5f96963e" +} +``` + +### Batch Execution Result REST API Call + +Below is an example API request to get the result of a previously started Batch Execution. + +```sh +curl --location '127.0.0.1:8080/api/v1/functions/execute/batch/result' \ +--header 'Content-Type: application/json' \ +--data '{ + "id": "d949d4e9-6419-4ad2-87c5-3a8deb33ee61" +}' +``` + +Example output: + +```json +{ + "chunks": { + "5302720e-d689-4463-adcb-8afb9c615135": { + "peer": "12D3KooWNbW985igznpKwHAf1pxByyNeipQFtEFLynNrWhsEHkL9", + "results": { + "1f33048c02455bb49807ae58e2ccccca": { + "result": { + "stdout": "https://example.com/dir1/dir2/resource/some-random-slug-13", + "exit_code": 0 + }, + "function_invocation": "bafybeie3nlygbnuxhvqv3gvwa2hmd4tcfzk5jtvscwl6qs3ljn5tknlt4q/echo.wasm", + "arguments": [ + "https://example.com/dir1/dir2/resource/some-random-slug-13" + ] + }, + "268a4145a50ade48aed2b1147d3518c6": { + "result": { + "stdout": "https://example.com/dir1/dir2/resource/some-random-slug-1", + "exit_code": 0 + }, + "function_invocation": "bafybeie3nlygbnuxhvqv3gvwa2hmd4tcfzk5jtvscwl6qs3ljn5tknlt4q/echo.wasm", + "arguments": [ + "https://example.com/dir1/dir2/resource/some-random-slug-1" + ] + }, + "982931a535ee64f91fc822b5a0d3a555": { + "result": { + "stdout": "https://example.com/dir1/dir2/resource/some-random-slug-9", + "exit_code": 0 + }, + "function_invocation": "bafybeie3nlygbnuxhvqv3gvwa2hmd4tcfzk5jtvscwl6qs3ljn5tknlt4q/echo.wasm", + "arguments": [ + "https://example.com/dir1/dir2/resource/some-random-slug-9" + ] + }, + "9954818207fe952736f370daf453f264": { + "result": { + "stdout": "https://example.com/dir1/dir2/resource/some-random-slug-5", + "exit_code": 0 + }, + "function_invocation": "bafybeie3nlygbnuxhvqv3gvwa2hmd4tcfzk5jtvscwl6qs3ljn5tknlt4q/echo.wasm", + "arguments": [ + "https://example.com/dir1/dir2/resource/some-random-slug-5" + ] + }, + "fd37bb6de5f0b9daafdec0820a6fd349": { + "result": { + "stdout": "https://example.com/dir1/dir2/resource/some-random-slug-17", + "exit_code": 0 + }, + "function_invocation": "bafybeie3nlygbnuxhvqv3gvwa2hmd4tcfzk5jtvscwl6qs3ljn5tknlt4q/echo.wasm", + "arguments": [ + "https://example.com/dir1/dir2/resource/some-random-slug-17" + ] + } + } + }, + "7397334c-54b6-4a15-8fab-824ec006a50e": { + "peer": "12D3KooWH7RrtuB1YgXFB4piS776eyRrPfK34y2wL8tkTEwNoQfp", + "results": { + "2da1965d6a1239fa71e98fdab897ff8d": { + "result": { + "stdout": "https://example.com/dir1/dir2/resource/some-random-slug-3", + "exit_code": 0 + }, + "function_invocation": "bafybeie3nlygbnuxhvqv3gvwa2hmd4tcfzk5jtvscwl6qs3ljn5tknlt4q/echo.wasm", + "arguments": [ + "https://example.com/dir1/dir2/resource/some-random-slug-3" + ] + }, + "ad69732dcf1756a2391fca4e8fd5c601": { + "result": { + "stdout": "https://example.com/dir1/dir2/resource/some-random-slug-11", + "exit_code": 0 + }, + "function_invocation": "bafybeie3nlygbnuxhvqv3gvwa2hmd4tcfzk5jtvscwl6qs3ljn5tknlt4q/echo.wasm", + "arguments": [ + "https://example.com/dir1/dir2/resource/some-random-slug-11" + ] + }, + "b7396904551260cbc63ad6b6bf098bcf": { + "result": { + "stdout": "https://example.com/dir1/dir2/resource/some-random-slug-19", + "exit_code": 0 + }, + "function_invocation": "bafybeie3nlygbnuxhvqv3gvwa2hmd4tcfzk5jtvscwl6qs3ljn5tknlt4q/echo.wasm", + "arguments": [ + "https://example.com/dir1/dir2/resource/some-random-slug-19" + ] + }, + "dbcb6ceb8e7a7c1e78743b8fb7629234": { + "result": { + "stdout": "https://example.com/dir1/dir2/resource/some-random-slug-7", + "exit_code": 0 + }, + "function_invocation": "bafybeie3nlygbnuxhvqv3gvwa2hmd4tcfzk5jtvscwl6qs3ljn5tknlt4q/echo.wasm", + "arguments": [ + "https://example.com/dir1/dir2/resource/some-random-slug-7" + ] + }, + "fbbfa810e9c16122262f600f548594aa": { + "result": { + "stdout": "https://example.com/dir1/dir2/resource/some-random-slug-15", + "exit_code": 0 + }, + "function_invocation": "bafybeie3nlygbnuxhvqv3gvwa2hmd4tcfzk5jtvscwl6qs3ljn5tknlt4q/echo.wasm", + "arguments": [ + "https://example.com/dir1/dir2/resource/some-random-slug-15" + ] + } + } + }, + "b9ce3ba3-004d-40b4-80f9-0cbf505d60f5": { + "peer": "12D3KooWMTteLBk8fUtFVyh6FMFwKroGwek15fDc5bApdDxeVWTd", + "results": { + "2efac038c9d489a6f8057455b8cd9773": { + "result": { + "stdout": "https://example.com/dir1/dir2/resource/some-random-slug-16", + "exit_code": 0 + }, + "function_invocation": "bafybeie3nlygbnuxhvqv3gvwa2hmd4tcfzk5jtvscwl6qs3ljn5tknlt4q/echo.wasm", + "arguments": [ + "https://example.com/dir1/dir2/resource/some-random-slug-16" + ] + }, + "4c555cef30403a7a11049c2883114da4": { + "result": { + "stdout": "https://example.com/dir1/dir2/resource/some-random-slug-0", + "exit_code": 0 + }, + "function_invocation": "bafybeie3nlygbnuxhvqv3gvwa2hmd4tcfzk5jtvscwl6qs3ljn5tknlt4q/echo.wasm", + "arguments": [ + "https://example.com/dir1/dir2/resource/some-random-slug-0" + ] + }, + "52347f161caec8ccea34f1308d4ab3ab": { + "result": { + "stdout": "https://example.com/dir1/dir2/resource/some-random-slug-4", + "exit_code": 0 + }, + "function_invocation": "bafybeie3nlygbnuxhvqv3gvwa2hmd4tcfzk5jtvscwl6qs3ljn5tknlt4q/echo.wasm", + "arguments": [ + "https://example.com/dir1/dir2/resource/some-random-slug-4" + ] + }, + "becb5828881f32bce44384c5c39b601b": { + "result": { + "stdout": "https://example.com/dir1/dir2/resource/some-random-slug-8", + "exit_code": 0 + }, + "function_invocation": "bafybeie3nlygbnuxhvqv3gvwa2hmd4tcfzk5jtvscwl6qs3ljn5tknlt4q/echo.wasm", + "arguments": [ + "https://example.com/dir1/dir2/resource/some-random-slug-8" + ] + }, + "d5255aff17d6b4358e917fcf8ecc11b2": { + "result": { + "stdout": "https://example.com/dir1/dir2/resource/some-random-slug-12", + "exit_code": 0 + }, + "function_invocation": "bafybeie3nlygbnuxhvqv3gvwa2hmd4tcfzk5jtvscwl6qs3ljn5tknlt4q/echo.wasm", + "arguments": [ + "https://example.com/dir1/dir2/resource/some-random-slug-12" + ] + } + } + }, + "e6ff1281-f10d-496b-895a-8056d4565a35": { + "peer": "12D3KooWJHs23DwVANCvKHRrZwxWbLXn4gxBhZgsjh8B8VfiiwbZ", + "results": { + "42ec3ed349e3e3d029ddde64c7899c05": { + "result": { + "stdout": "https://example.com/dir1/dir2/resource/some-random-slug-6", + "exit_code": 0 + }, + "function_invocation": "bafybeie3nlygbnuxhvqv3gvwa2hmd4tcfzk5jtvscwl6qs3ljn5tknlt4q/echo.wasm", + "arguments": [ + "https://example.com/dir1/dir2/resource/some-random-slug-6" + ] + }, + "8c7354c2a28bd99e0eef701234c7406e": { + "result": { + "stdout": "https://example.com/dir1/dir2/resource/some-random-slug-2", + "exit_code": 0 + }, + "function_invocation": "bafybeie3nlygbnuxhvqv3gvwa2hmd4tcfzk5jtvscwl6qs3ljn5tknlt4q/echo.wasm", + "arguments": [ + "https://example.com/dir1/dir2/resource/some-random-slug-2" + ] + }, + "bbc6ceac22629ebcc3f2f5b0295360c0": { + "result": { + "stdout": "https://example.com/dir1/dir2/resource/some-random-slug-18", + "exit_code": 0 + }, + "function_invocation": "bafybeie3nlygbnuxhvqv3gvwa2hmd4tcfzk5jtvscwl6qs3ljn5tknlt4q/echo.wasm", + "arguments": [ + "https://example.com/dir1/dir2/resource/some-random-slug-18" + ] + }, + "cc10dad585fb81bbb8822d030434d469": { + "result": { + "stdout": "https://example.com/dir1/dir2/resource/some-random-slug-14", + "exit_code": 0 + }, + "function_invocation": "bafybeie3nlygbnuxhvqv3gvwa2hmd4tcfzk5jtvscwl6qs3ljn5tknlt4q/echo.wasm", + "arguments": [ + "https://example.com/dir1/dir2/resource/some-random-slug-14" + ] + }, + "e2a6032841c31e9d1dc5e73350d721ae": { + "result": { + "stdout": "https://example.com/dir1/dir2/resource/some-random-slug-10", + "exit_code": 0 + }, + "function_invocation": "bafybeie3nlygbnuxhvqv3gvwa2hmd4tcfzk5jtvscwl6qs3ljn5tknlt4q/echo.wasm", + "arguments": [ + "https://example.com/dir1/dir2/resource/some-random-slug-10" + ] + } + } + } + }, + "code": "200", + "request_id": "cb24c8cd-9c67-4bfc-8c87-2ce2ef4d8f10" +} +``` + +## Notes and Ideas for Improvements + +### MongoDB ID types + +We do not use the native MongoDB ID type for IDs for our records - batches, chunks or work items. +Instead, we use UUID v4 converted to strings. + +This will be less performant and we can look into a different solution. + +### MongoDB Indexes + +Currently we do not have any indexes on MongoDB collections. + +### Records in MongoDB queries are not split into parts + +Currently we do not do any chunking when working with MongoDB - we do everything in a single query. +In case of inserting or updating very large number of work items, this may prove problematic and we split the input records into parts. + +### Batches Are Resumed Sequentially + +Because we're not sure on the number of available workers and other variables, when head node resumes batch execution, it will do this one by one. + +### Work Items are Executed Sequentially by Workers + +We could allow a degree of parallelism for Work Item processing. +However, in the scenario of web scraping e.g. 1000 URLs on a single domain, processing everything in parallel would likely lead to blocking of the offending worker. +For now, the safest option was to do this sequentially. +However, even now there is a risk of having too large rate of requests and still leading to bans. + +### In-Progress Work Items + +At the moment, a Work item could be assigned to a worker node and will be marked as `IN PROGRESS`. +A node could fail or something similar could happen and the Work item could remain stuck in this state. + +We could (should) consider a mechanism to reset the state of the work item, to make it eligible for assignment to a different node. +This could be something like a time period during which work item has not been completed. +Alternatively (ties into the next point) we could ping the worker for the result and, if none is provided, reset the state of the work item then. + +### Work Item Result Polling + +Right now worker sends the results to head node after it has completed its chunk. +However, if a head node goes down temporarily, it might not receive it. + +We might end up in a state where + +1. Worker node is done with the execution, and +2. Head node did not receive the result, thus failed to persist it and update the DB. + +We could introduce polling from the Head node to the worker node, effectively asking "do you have results of chunk `X` for me?". +If yes, it could update the information in the DB. +If not, we would need to differentiate between still in progress work items, and those that were "lost". +Worker nodes do not persist the information about work items they were assigned, so after restart worker would not be aware of the fact that something was expected from it. + +### Number of Nodes Forced also on Batch Resume + +For Batch Execution Request, we respect the number of nodes the user specified. +For example, the user might send Batch with 100 work items and request 10 nodes to process them. +These will be split into 10 chunks of 10 work items each. +Lets say one worker fails - we will have 90 `DONE` work items and 10 `FAILED`. + +On Batch resume, we will pick up 10 non-complete work items, and resume batch execution. +However, we will again honor the number of nodes specified. +This means we will again request 10 nodes to process these 10 items. + +We could be happy with a smaller number of workers and not force the original one. + +### Multiple Identical Work Items in a Batch + +Because of the way we calculate [work item IDs](#work-item-id), multiple work items in a single batch that are identical will produce identical IDs. +This might lead to collisions. +However, it did not seem like a good idea to manually validate uniqueness as batches could be quite large as it could be CPU intensive. diff --git a/executor/config.go b/executor/config.go index 677339af..abf6b360 100644 --- a/executor/config.go +++ b/executor/config.go @@ -1,7 +1,7 @@ package executor import ( - "github.com/armon/go-metrics" + "github.com/hashicorp/go-metrics" "github.com/spf13/afero" "github.com/blessnetwork/b7s/models/bls" diff --git a/executor/execute_function.go b/executor/execute_function.go index 6244d6b2..e0850a7d 100644 --- a/executor/execute_function.go +++ b/executor/execute_function.go @@ -5,7 +5,7 @@ import ( "fmt" "time" - "github.com/armon/go-metrics" + "github.com/hashicorp/go-metrics" "go.opentelemetry.io/otel/trace" "github.com/blessnetwork/b7s/models/codes" diff --git a/executor/executor.go b/executor/executor.go index 091a454e..b9ae6a7b 100644 --- a/executor/executor.go +++ b/executor/executor.go @@ -6,7 +6,7 @@ import ( "fmt" "path/filepath" - "github.com/armon/go-metrics" + "github.com/hashicorp/go-metrics" "github.com/rs/zerolog" "github.com/blessnetwork/b7s/telemetry/tracing" diff --git a/executor/params.go b/executor/params.go index 999a7085..c4685dcd 100644 --- a/executor/params.go +++ b/executor/params.go @@ -3,7 +3,7 @@ package executor import ( "os" - "github.com/armon/go-metrics/prometheus" + "github.com/hashicorp/go-metrics/prometheus" ) const ( diff --git a/fstore/fstore.go b/fstore/fstore.go index 7861fb77..c8408ace 100644 --- a/fstore/fstore.go +++ b/fstore/fstore.go @@ -4,8 +4,8 @@ import ( "net/http" "sync" - "github.com/armon/go-metrics" "github.com/cavaliergopher/grab/v3" + "github.com/hashicorp/go-metrics" "github.com/rs/zerolog" "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" "go.opentelemetry.io/otel" diff --git a/fstore/fstore_integration_test.go b/fstore/fstore_integration_test.go index ee287666..d012de98 100644 --- a/fstore/fstore_integration_test.go +++ b/fstore/fstore_integration_test.go @@ -18,8 +18,8 @@ import ( "github.com/blessnetwork/b7s/fstore" "github.com/blessnetwork/b7s/models/bls" - "github.com/blessnetwork/b7s/store" - "github.com/blessnetwork/b7s/store/codec" + "github.com/blessnetwork/b7s/stores/store" + "github.com/blessnetwork/b7s/stores/store/codec" "github.com/blessnetwork/b7s/testing/helpers" "github.com/blessnetwork/b7s/testing/mocks" ) diff --git a/fstore/http_internal_test.go b/fstore/http_internal_test.go index c5b4af15..7d3c1346 100644 --- a/fstore/http_internal_test.go +++ b/fstore/http_internal_test.go @@ -15,8 +15,8 @@ import ( "github.com/stretchr/testify/require" "github.com/blessnetwork/b7s/models/bls" - "github.com/blessnetwork/b7s/store" - "github.com/blessnetwork/b7s/store/codec" + "github.com/blessnetwork/b7s/stores/store" + "github.com/blessnetwork/b7s/stores/store/codec" "github.com/blessnetwork/b7s/testing/helpers" "github.com/blessnetwork/b7s/testing/mocks" ) diff --git a/fstore/install_test.go b/fstore/install_test.go index 37225f2a..2f50dd02 100644 --- a/fstore/install_test.go +++ b/fstore/install_test.go @@ -17,8 +17,8 @@ import ( "github.com/blessnetwork/b7s/fstore" "github.com/blessnetwork/b7s/models/bls" - "github.com/blessnetwork/b7s/store" - "github.com/blessnetwork/b7s/store/codec" + "github.com/blessnetwork/b7s/stores/store" + "github.com/blessnetwork/b7s/stores/store/codec" "github.com/blessnetwork/b7s/testing/helpers" "github.com/blessnetwork/b7s/testing/mocks" ) diff --git a/fstore/params.go b/fstore/params.go index 75eacde5..251866b8 100644 --- a/fstore/params.go +++ b/fstore/params.go @@ -3,7 +3,7 @@ package fstore import ( "time" - "github.com/armon/go-metrics/prometheus" + "github.com/hashicorp/go-metrics/prometheus" ) const ( diff --git a/go.mod b/go.mod index 783cf010..6807208a 100644 --- a/go.mod +++ b/go.mod @@ -1,11 +1,10 @@ module github.com/blessnetwork/b7s -go 1.23.2 +go 1.24.4 require ( github.com/Microsoft/go-winio v0.6.1 github.com/a-h/templ v0.3.819 - github.com/armon/go-metrics v0.4.1 github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 github.com/blocklessnetwork/b7s-attributes v0.0.0 github.com/cavaliergopher/grab/v3 v3.0.1 @@ -18,6 +17,7 @@ require ( github.com/go-logr/zerologr v1.2.3 github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 github.com/hashicorp/go-hclog v1.6.3 + github.com/hashicorp/go-metrics v0.5.4 github.com/hashicorp/raft v1.7.1 github.com/hashicorp/raft-boltdb/v2 v2.3.0 github.com/ipfs/boxo v0.24.0 @@ -38,6 +38,7 @@ require ( github.com/spf13/afero v1.11.0 github.com/stretchr/testify v1.9.0 github.com/ziflex/lecho/v3 v3.7.0 + go.mongodb.org/mongo-driver/v2 v2.2.2 go.opentelemetry.io/contrib/instrumentation/github.com/labstack/echo/otelecho v0.55.0 go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.55.0 go.opentelemetry.io/otel v1.31.0 @@ -50,6 +51,7 @@ require ( ) require ( + github.com/armon/go-metrics v0.4.1 // indirect github.com/boltdb/bolt v1.3.1 // indirect github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/cilium/ebpf v0.16.0 // indirect @@ -114,13 +116,17 @@ require ( github.com/valyala/bytebufferpool v1.0.0 // indirect github.com/valyala/fasttemplate v1.2.2 // indirect github.com/wlynxg/anet v0.0.5 // indirect + github.com/xdg-go/pbkdf2 v1.0.0 // indirect + github.com/xdg-go/scram v1.1.2 // indirect + github.com/xdg-go/stringprep v1.0.4 // indirect + github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78 // indirect go.etcd.io/bbolt v1.3.11 // indirect go.opentelemetry.io/otel/metric v1.31.0 // indirect go.opentelemetry.io/proto/otlp v1.3.1 // indirect go.uber.org/dig v1.18.0 // indirect go.uber.org/fx v1.23.0 // indirect go.uber.org/mock v0.5.0 // indirect - golang.org/x/text v0.21.0 // indirect + golang.org/x/text v0.22.0 // indirect golang.org/x/time v0.7.0 // indirect gonum.org/v1/gonum v0.15.1 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20241007155032-5fefd90f89a9 // indirect @@ -147,7 +153,7 @@ require ( github.com/francoispqt/gojay v1.2.13 // indirect github.com/godbus/dbus/v5 v5.1.0 // indirect github.com/gogo/protobuf v1.3.3 // indirect - github.com/golang/snappy v0.0.4 // indirect + github.com/golang/snappy v1.0.0 // indirect github.com/google/gopacket v1.1.19 // indirect github.com/google/uuid v1.6.0 github.com/hashicorp/errwrap v1.1.0 // indirect @@ -210,12 +216,12 @@ require ( go.opencensus.io v0.24.0 // indirect go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.27.0 // indirect - golang.org/x/crypto v0.31.0 // indirect + golang.org/x/crypto v0.33.0 // indirect golang.org/x/exp v0.0.0-20241217172543-b2144cdd0a67 // indirect golang.org/x/mod v0.22.0 // indirect golang.org/x/net v0.33.0 // indirect - golang.org/x/sync v0.10.0 - golang.org/x/sys v0.28.0 + golang.org/x/sync v0.11.0 + golang.org/x/sys v0.30.0 golang.org/x/tools v0.28.0 // indirect google.golang.org/protobuf v1.35.1 // indirect lukechampine.com/blake3 v1.3.0 // indirect diff --git a/go.sum b/go.sum index e4672f18..817add1d 100644 --- a/go.sum +++ b/go.sum @@ -19,6 +19,7 @@ github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuy github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= +github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho= github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239/go.mod h1:2FmKhYUyUczH0OGQWaF5ceTx0UBShxjsH6f8oGKYe2c= github.com/armon/go-metrics v0.4.1 h1:hR91U9KYmb6bLBYLQjyM+3j+rcd/UhE+G78SFnF8gJA= github.com/armon/go-metrics v0.4.1/go.mod h1:E6amYzXo6aW1tqzoZGT755KkbgrJsSdpwZ+3JqfkOG4= @@ -134,8 +135,10 @@ github.com/go-jose/go-jose/v4 v4.0.4 h1:VsjPI33J0SB9vQM6PLmNjoHqMQNGPiZ0rHL7Ni7Q github.com/go-jose/go-jose/v4 v4.0.4/go.mod h1:NKb5HO1EZccyMpiZNbdUw/14tiXNyUJh188dfnMCAfc= github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= +github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY= github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= +github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= @@ -181,11 +184,12 @@ github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrU github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= +github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= -github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= -github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/golang/snappy v1.0.0 h1:Oy607GVXHs7RtbggtPBnr2RmDArIsAefDwvrdWvRhGs= +github.com/golang/snappy v1.0.0/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= @@ -194,6 +198,8 @@ github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-github v17.0.0+incompatible/go.mod h1:zLgOLi98H3fifZn+44m+umXrS52loVEgC2AApnigrVQ= @@ -231,6 +237,8 @@ github.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVH github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= github.com/hashicorp/go-immutable-radix v1.3.1 h1:DKHmCUm2hRBK510BaiZlwvpD40f8bJFeZnpfm2KLowc= github.com/hashicorp/go-immutable-radix v1.3.1/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= +github.com/hashicorp/go-metrics v0.5.4 h1:8mmPiIJkTPPEbAiV97IxdAGNdRdaWwVap1BU6elejKY= +github.com/hashicorp/go-metrics v0.5.4/go.mod h1:CG5yz4NZ/AI/aQt9Ucm/vdBnbh7fvmv4lxZ350i+QQI= github.com/hashicorp/go-msgpack v0.5.5 h1:i9R9JSrqIz0QVLz3sz+i3YJdT7TTSLcfLLzJi9aZTuI= github.com/hashicorp/go-msgpack v0.5.5/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM= github.com/hashicorp/go-msgpack/v2 v2.1.2 h1:4Ee8FTp834e+ewB71RDrQ0VKpyFdrKOjvYtnQ/ltVj0= @@ -286,14 +294,18 @@ github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8Hm github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/josharian/native v1.1.0 h1:uuaP0hAbW7Y4l0ZRQ6C9zfb7Mg1mbFKry/xzDAfmtLA= github.com/josharian/native v1.1.0/go.mod h1:7X/raswPFr05uY3HiLlYeyQntB6OO7E/d2Cu7qoaN2w= +github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= github.com/jsimonetti/rtnetlink/v2 v2.0.1 h1:xda7qaHDSVOsADNouv7ukSuicKZO7GgVUCXxpaIEIlM= github.com/jsimonetti/rtnetlink/v2 v2.0.1/go.mod h1:7MoNYNbb3UaDHtF8udiJo/RH6VsTKP1pqKLUTVCvToE= github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo= github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= +github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= @@ -315,6 +327,7 @@ github.com/knadh/koanf/providers/structs v0.1.0/go.mod h1:sw2YZ3txUcqA3Z27gPlmmB github.com/knadh/koanf/v2 v2.1.1 h1:/R8eXqasSTsmDCsAyYj+81Wteg8AqrV9CP6gvsTsOmM= github.com/knadh/koanf/v2 v2.1.1/go.mod h1:4mnTRbZCK+ALuBXHZMjDfG9y714L7TykVnZkXbMU3Es= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/koron/go-ssdp v0.0.4 h1:1IDwrghSKYM7yLf7XCzbByg2sJ/JcNOZRXS2jczTwz0= github.com/koron/go-ssdp v0.0.4/go.mod h1:oDXq+E5IL5q0U8uSBcoAXzTzInwy5lEgC91HoKtbmZk= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= @@ -445,6 +458,7 @@ github.com/multiformats/go-varint v0.0.7/go.mod h1:r8PUYw/fD/SjBCiKOoDlGF6QawOEL github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= +github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/neelance/astrewrite v0.0.0-20160511093645-99348263ae86/go.mod h1:kHJEU3ofeGjhHklVoIGuVj85JJwZ6kWPaJwCIxgnFmo= github.com/neelance/sourcemap v0.0.0-20151028013722-8c68805598ab/go.mod h1:Qr6/a/Q4r9LP1IltGz7tA7iOK1WonHEYhu1HRBA7ZiM= github.com/onsi/ginkgo/v2 v2.22.1 h1:QW7tbJAUDyVDVOM5dFa7qaybo+CRfR7bemlQUN6Z8aM= @@ -519,6 +533,8 @@ github.com/prometheus/client_golang v0.8.0/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXP github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU= +github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= +github.com/prometheus/client_golang v1.11.1/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0= github.com/prometheus/client_golang v1.20.4 h1:Tgh3Yr67PaOv/uTqloMsCEdeuFTatm5zIq5+qNN23vI= github.com/prometheus/client_golang v1.20.4/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= @@ -530,12 +546,16 @@ github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQy github.com/prometheus/common v0.0.0-20180801064454-c7de2306084e/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4= +github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= +github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc= github.com/prometheus/common v0.60.0 h1:+V9PAREWNvJMAuJ1x1BaWl9dewMW4YrHZQbx0sJNllA= github.com/prometheus/common v0.60.0/go.mod h1:h0LYf1R1deLSKtD4Vdg8gy4RuOvENW2J/h19V5NADQw= github.com/prometheus/procfs v0.0.0-20180725123919-05ee40e3a273/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A= +github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= +github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= github.com/quic-go/qpack v0.5.1 h1:giqksBPnT/HDtZ6VhtFKgoLOWmlyo9Ei6u9PqzIMbhI= @@ -582,6 +602,7 @@ github.com/shurcooL/users v0.0.0-20180125191416-49c67e49c537/go.mod h1:QJTqeLYED github.com/shurcooL/webdavfs v0.0.0-20170829043945-18c3829fa133/go.mod h1:hKmq5kWdCj2z2KEozexVbfEZIWiTjhE0+UjmZgPqehw= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= +github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= @@ -634,6 +655,14 @@ github.com/whyrusleeping/go-keyspace v0.0.0-20160322163242-5b898ac5add1/go.mod h github.com/wlynxg/anet v0.0.3/go.mod h1:eay5PRQr7fIVAMbTbchTnO9gG65Hg/uYGdc7mguHxoA= github.com/wlynxg/anet v0.0.5 h1:J3VJGi1gvo0JwZ/P1/Yc/8p63SoW98B5dHkYDmpgvvU= github.com/wlynxg/anet v0.0.5/go.mod h1:eay5PRQr7fIVAMbTbchTnO9gG65Hg/uYGdc7mguHxoA= +github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c= +github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= +github.com/xdg-go/scram v1.1.2 h1:FHX5I5B4i4hKRVRBCFRxq1iQRej7WO3hhBuJf+UUySY= +github.com/xdg-go/scram v1.1.2/go.mod h1:RT/sEzTbU5y00aCK8UOx6R7YryM0iF1N2MOmC3kKLN4= +github.com/xdg-go/stringprep v1.0.4 h1:XLI/Ng3O1Atzq0oBs3TWm+5ZVgkq2aqdlvP9JtoZ6c8= +github.com/xdg-go/stringprep v1.0.4/go.mod h1:mPGuuIYwz7CmR2bT9j4GbQqutWS1zV24gijq1dTyGkM= +github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78 h1:ilQV1hzziu+LLM3zUTJ0trRztfwgjqKnBWNtSRkbmwM= +github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78/go.mod h1:aL8wCCfTfSfmXjznFBSZNN13rSJjlIOI1fUNAtF7rmI= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= @@ -642,6 +671,8 @@ github.com/ziflex/lecho/v3 v3.7.0 h1:MSzYINEHtAaCx2XpbdF1A85aSyXitNJxF4T9dG6jzRQ github.com/ziflex/lecho/v3 v3.7.0/go.mod h1:LBlLsyIwa0MFxtJ2WU5WzHfuMR/jnq26TXddWfJ+s/0= go.etcd.io/bbolt v1.3.11 h1:yGEzV1wPz2yVCLsD8ZAiGHhHVlczyC9d1rP43/VCRJ0= go.etcd.io/bbolt v1.3.11/go.mod h1:dksAq7YMXoljX0xu6VF5DMZGbhYYoLUalEiSySYAS4I= +go.mongodb.org/mongo-driver/v2 v2.2.2 h1:9cYuS3fl1Xhqwpfazso10V7BHQD58kCgtzhfAmJYz9c= +go.mongodb.org/mongo-driver/v2 v2.2.2/go.mod h1:qQkDMhCGWl3FN509DfdPd4GRBLU/41zqF/k8eTRceps= go.opencensus.io v0.18.0/go.mod h1:vKdFvxhtzZ9onBp9VKHK8z/sRpBMnKAsufL7wlDrCOA= go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= @@ -698,8 +729,8 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y golang.org/x/crypto v0.8.0/go.mod h1:mRqEX+O9/h5TFCrQhkgjo2yKi0yYA+9ecGkdQoHrywE= golang.org/x/crypto v0.12.0/go.mod h1:NF0Gs7EO5K4qLn+Ylc+fih8BSTeIjAP05siRnAh98yw= golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg= -golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U= -golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= +golang.org/x/crypto v0.33.0 h1:IOBPskki6Lysi0lo9qQvbxiQ+FvsCC/YWOecCHAixus= +golang.org/x/crypto v0.33.0/go.mod h1:bVdXmD7IV/4GdElGPozy6U7lWdRXA4qyRVGJV57uQ5M= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20241217172543-b2144cdd0a67 h1:1UoZQm6f0P/ZO0w1Ri+f+ifG/gXhegadRdwBIXEFWDo= golang.org/x/exp v0.0.0-20241217172543-b2144cdd0a67/go.mod h1:qj5a5QZpwLU2NLQudwIN5koi3beDhSAlJwa67PuM98c= @@ -732,6 +763,7 @@ golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= @@ -757,11 +789,12 @@ golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ= -golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w= +golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180810173357-98c5dad5d1a0/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -773,16 +806,21 @@ golang.org/x/sys v0.0.0-20190316082340-a2f829d7f35f/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200124204421-9fbb57f87de9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200602225109-6fdc65e7d980/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -799,8 +837,8 @@ golang.org/x/sys v0.9.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= -golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc= +golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= @@ -813,12 +851,13 @@ golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3 golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.12.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= -golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= +golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM= +golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.7.0 h1:ntUhktv3OPE6TgYxXWv9vKvUSJyIFJlyohwbkEwPrKQ= @@ -885,6 +924,7 @@ google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2 google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= @@ -899,6 +939,7 @@ gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/host/discovery.go b/host/discovery.go index 3573d210..9ac1ccb7 100644 --- a/host/discovery.go +++ b/host/discovery.go @@ -134,7 +134,6 @@ func (h *Host) connectToPeers(ctx context.Context, peers []bls.Peer) error { // Connect to the bootstrap nodes. var errGroup errgroup.Group for _, peer := range peers { - peer := peer // Should not happen other than misconfig, but we shouldn't dial self. if peer.ID == h.ID() { diff --git a/host/host.go b/host/host.go index b0832a83..4e50d814 100644 --- a/host/host.go +++ b/host/host.go @@ -5,8 +5,8 @@ import ( "fmt" "os" - "github.com/armon/go-metrics" "github.com/asaskevich/govalidator" + "github.com/hashicorp/go-metrics" "github.com/libp2p/go-libp2p" pubsub "github.com/libp2p/go-libp2p-pubsub" "github.com/libp2p/go-libp2p/core/crypto" diff --git a/host/params.go b/host/params.go index 77b1cc3c..6f3038bf 100644 --- a/host/params.go +++ b/host/params.go @@ -1,7 +1,7 @@ package host import ( - "github.com/armon/go-metrics/prometheus" + "github.com/hashicorp/go-metrics/prometheus" ) const ( diff --git a/host/publish.go b/host/publish.go index 4d886ba3..cb5fcc9e 100644 --- a/host/publish.go +++ b/host/publish.go @@ -4,7 +4,7 @@ import ( "context" "fmt" - "github.com/armon/go-metrics" + "github.com/hashicorp/go-metrics" pubsub "github.com/libp2p/go-libp2p-pubsub" ) diff --git a/host/send.go b/host/send.go index 2e04b7ca..8667887a 100644 --- a/host/send.go +++ b/host/send.go @@ -4,7 +4,7 @@ import ( "context" "fmt" - "github.com/armon/go-metrics" + "github.com/hashicorp/go-metrics" "github.com/libp2p/go-libp2p/core/peer" "github.com/libp2p/go-libp2p/core/protocol" diff --git a/integration/node/node_integration_test.go b/integration/node/node_integration_test.go index 0b7f095e..cab553df 100644 --- a/integration/node/node_integration_test.go +++ b/integration/node/node_integration_test.go @@ -30,8 +30,8 @@ import ( "github.com/blessnetwork/b7s/node" "github.com/blessnetwork/b7s/node/head" "github.com/blessnetwork/b7s/node/worker" - "github.com/blessnetwork/b7s/store" - "github.com/blessnetwork/b7s/store/codec" + "github.com/blessnetwork/b7s/stores/store" + "github.com/blessnetwork/b7s/stores/store/codec" "github.com/blessnetwork/b7s/testing/helpers" "github.com/blessnetwork/b7s/testing/mocks" ) diff --git a/models/execute/request.go b/models/execute/request.go index 6dd741d2..7a1c2616 100644 --- a/models/execute/request.go +++ b/models/execute/request.go @@ -14,19 +14,27 @@ import ( // CID/method.wasm ... type RequestHash string -func GetExecutionID(r Request) RequestHash { +func (r Request) GetExecutionID() RequestHash { + return ExecutionID(r.FunctionID, r.Method, r.Arguments) +} + +func (r Request) FunctionInvocation() string { + return FunctionInvocation(r.FunctionID, r.Method) +} + +func ExecutionID(cid string, method string, arguments []string) RequestHash { // CID/method.wasm arg1 arg2 arg3 sum := md5.Sum( - fmt.Appendf([]byte{}, "%v %v", FunctionInvocation(r), strings.Join(r.Arguments, " ")), + fmt.Appendf([]byte{}, "%v %v", FunctionInvocation(cid, method), strings.Join(arguments, " ")), ) hex := hex.EncodeToString(sum[:]) return RequestHash(hex) } -func FunctionInvocation(r Request) string { - return r.FunctionID + "/" + r.Method +func FunctionInvocation(cid string, method string) string { + return cid + "/" + method } func (h RequestHash) String() string { diff --git a/models/execute/result.go b/models/execute/result.go index 00d8eb81..0e4b7af6 100644 --- a/models/execute/result.go +++ b/models/execute/result.go @@ -17,44 +17,43 @@ import ( type NodeResult struct { Result - Signature string `json:"signature,omitempty"` // Signed digest of the response. - PBFT PBFTResultInfo `json:"pbft,omitempty"` - Metadata any `json:"metadata,omitempty"` + Signature string `json:"signature,omitzero"` // Signed digest of the response. + PBFT PBFTResultInfo `json:"pbft,omitzero"` + Metadata any `json:"metadata,omitzero"` } // Result describes an execution result. type Result struct { - Code codes.Code `json:"code"` - Result RuntimeOutput `json:"result"` - Usage Usage `json:"usage,omitempty"` + Code codes.Code `json:"code,omitzero"` + Result RuntimeOutput `json:"result,omitzero"` + Usage Usage `json:"usage,omitzero"` } // Cluster represents the set of peers that executed the request. type Cluster struct { - Main peer.ID `json:"main,omitempty"` - Peers []peer.ID `json:"peers,omitempty"` + Main peer.ID `json:"main,omitzero"` + Peers []peer.ID `json:"peers,omitzero"` } // RuntimeOutput describes the output produced by the Bless Runtime during execution. type RuntimeOutput struct { - Stdout string `json:"stdout"` - Stderr string `json:"stderr"` + Stdout string `json:"stdout,omitzero"` + Stderr string `json:"stderr,omitzero"` ExitCode int `json:"exit_code"` - Log string `json:"-"` } // Usage represents the resource usage information for a particular execution. type Usage struct { - WallClockTime time.Duration `json:"wall_clock_time,omitempty"` - CPUUserTime time.Duration `json:"cpu_user_time,omitempty"` - CPUSysTime time.Duration `json:"cpu_sys_time,omitempty"` - MemoryMaxKB int64 `json:"memory_max_kb,omitempty"` + WallClockTime time.Duration `json:"wall_clock_time,omitzero"` + CPUUserTime time.Duration `json:"cpu_user_time,omitzero"` + CPUSysTime time.Duration `json:"cpu_sys_time,omitzero"` + MemoryMaxKB int64 `json:"memory_max_kb,omitzero"` } type PBFTResultInfo struct { View *uint `json:"view,omitempty"` RequestTimestamp time.Time `json:"request_timestamp,omitzero"` - Replica peer.ID `json:"replica,omitempty"` + Replica peer.ID `json:"replica,omitzero"` } // ResultMap contains execution results from multiple peers. diff --git a/models/request/execute_batch.go b/models/request/execute_batch.go index 0dd99037..36462a07 100644 --- a/models/request/execute_batch.go +++ b/models/request/execute_batch.go @@ -12,17 +12,16 @@ import ( type ExecuteBatch struct { bls.BaseMessage - Topic string `json:"topic,omitempty"` - Template ExecutionRequestTemplate `json:"template,omitempty"` - Arguments [][]string `json:"arguments,omitempty"` - WorkerConcurrencyLimit uint `json:"worker_concurrency_limit,omitempty"` + Topic string `json:"topic,omitempty"` + Template ExecutionRequestTemplate `json:"template,omitempty"` + Arguments [][]string `json:"arguments,omitempty"` + MaxAttempts uint32 `json:"max_attempts,omitempty"` } func (e ExecuteBatch) Response(c codes.Code, id string) *response.ExecuteBatch { return &response.ExecuteBatch{ BaseMessage: bls.BaseMessage{TraceInfo: e.TraceInfo}, RequestID: id, - Code: c, } } @@ -39,19 +38,19 @@ func (e ExecuteBatch) RollCall(id string) *RollCall { RequestID: id, FunctionID: e.Template.FunctionID, Attributes: e.Template.Config.Attributes, + Batch: true, } } -func (e ExecuteBatch) WorkOrderBatch(requestID string, strandID string, arguments ...[]string) *WorkOrderBatch { +func (e ExecuteBatch) WorkOrderBatch(requestID string, chunkID string, arguments ...[]string) *WorkOrderBatch { // TBD: Implement. w := &WorkOrderBatch{ - BaseMessage: bls.BaseMessage{TraceInfo: e.TraceInfo}, - RequestID: requestID, - StrandID: strandID, - Template: e.Template, - Arguments: arguments, - ConcurrencyLimit: e.WorkerConcurrencyLimit, + BaseMessage: bls.BaseMessage{TraceInfo: e.TraceInfo}, + RequestID: requestID, + ChunkID: chunkID, + Template: e.Template, + Arguments: arguments, } return w } diff --git a/models/request/roll_call.go b/models/request/roll_call.go index bea049b2..b216655b 100644 --- a/models/request/roll_call.go +++ b/models/request/roll_call.go @@ -19,14 +19,19 @@ type RollCall struct { RequestID string `json:"request_id,omitempty"` Consensus consensus.Type `json:"consensus"` Attributes *execute.Attributes `json:"attributes,omitempty"` + // This field is used to discriminate against workers that do not support batch executions. + // Workers that are aware of it should relay this flag back to us, while workers that are not + // will ignore/omit it. + Batch bool `json:"batch,omitempty"` } func (r RollCall) Response(c codes.Code) *response.RollCall { return &response.RollCall{ - BaseMessage: bls.BaseMessage{TraceInfo: r.TraceInfo}, - FunctionID: r.FunctionID, - RequestID: r.RequestID, - Code: c, + BaseMessage: bls.BaseMessage{TraceInfo: r.TraceInfo}, + FunctionID: r.FunctionID, + RequestID: r.RequestID, + Code: c, + BatchSupport: r.Batch, } } diff --git a/models/request/work_order_batch.go b/models/request/work_order_batch.go index 4d8df81b..809c2d6b 100644 --- a/models/request/work_order_batch.go +++ b/models/request/work_order_batch.go @@ -17,7 +17,7 @@ type WorkOrderBatch struct { // Technically workers don't need to know the request ID. // But for easier troubleshooting, at least for now, it's okay. RequestID string `json:"request_id,omitempty"` - StrandID string `json:"strand_id,omitempty"` + ChunkID string `json:"chunk_id,omitempty"` Arguments [][]string `json:"arguments,omitempty"` ConcurrencyLimit uint `json:"concurrency_limit,omitempty"` } @@ -53,8 +53,8 @@ func (w WorkOrderBatch) Valid() error { multierr = multierror.Append(multierr, errors.New("request ID is required")) } - if w.StrandID == "" { - multierr = multierror.Append(multierr, errors.New("strand ID is required")) + if w.ChunkID == "" { + multierr = multierror.Append(multierr, errors.New("chunk ID is required")) } if len(w.Arguments) == 0 { diff --git a/models/response/execute.go b/models/response/execute.go index 19c4fcd7..f7da736e 100644 --- a/models/response/execute.go +++ b/models/response/execute.go @@ -14,12 +14,12 @@ var _ (json.Marshaler) = (*Execute)(nil) type Execute struct { bls.BaseMessage RequestID string `json:"request_id,omitempty"` - Code codes.Code `json:"code,omitempty"` - Results execute.ResultMap `json:"results,omitempty"` - Cluster execute.Cluster `json:"cluster,omitempty"` + Code codes.Code `json:"code,omitzero"` + Results execute.ResultMap `json:"results,omitzero"` + Cluster execute.Cluster `json:"cluster,omitzero"` // Used to communicate the reason for failure to the user. - ErrorMessage string `json:"message,omitempty"` + ErrorMessage string `json:"message,omitzero"` } func (e *Execute) WithResults(r execute.ResultMap) *Execute { diff --git a/models/response/execute_batch.go b/models/response/execute_batch.go index ab592ab6..24fbf3a3 100644 --- a/models/response/execute_batch.go +++ b/models/response/execute_batch.go @@ -13,21 +13,21 @@ var _ (json.Marshaler) = (*ExecuteBatch)(nil) // Execute describes the response to the `MessageExecuteBatch` message. type ExecuteBatch struct { bls.BaseMessage - RequestID string `json:"request_id,omitempty"` - Code codes.Code `json:"code,omitempty"` - Strands map[string]NodeStrandResults `json:"strands,omitempty"` + RequestID string `json:"request_id,omitempty"` + Code codes.Code `json:"code,omitempty"` + Chunks map[string]NodeChunkResults `json:"chunks,omitempty"` // Used to communicate the reason for failure to the user. ErrorMessage string `json:"message,omitempty"` } -type NodeStrandResults struct { +type NodeChunkResults struct { Peer peer.ID `json:"peer,omitempty"` Results BatchResults `json:"results,omitempty"` } -func (e *ExecuteBatch) WithResults(strands map[string]NodeStrandResults) *ExecuteBatch { - e.Strands = strands +func (e *ExecuteBatch) WithResults(chunks map[string]NodeChunkResults) *ExecuteBatch { + e.Chunks = chunks return e } diff --git a/models/response/roll_call.go b/models/response/roll_call.go index e8cda49d..9ac09bf0 100644 --- a/models/response/roll_call.go +++ b/models/response/roll_call.go @@ -12,9 +12,10 @@ var _ (json.Marshaler) = (*RollCall)(nil) // RollCall describes the `MessageRollCall` response payload. type RollCall struct { bls.BaseMessage - Code codes.Code `json:"code,omitempty"` - FunctionID string `json:"function_id,omitempty"` - RequestID string `json:"request_id,omitempty"` + Code codes.Code `json:"code,omitempty"` + FunctionID string `json:"function_id,omitempty"` + RequestID string `json:"request_id,omitempty"` + BatchSupport bool `json:"batch_support,omitempty"` } func (RollCall) Type() string { return bls.MessageRollCallResponse } diff --git a/models/response/work_order_batch.go b/models/response/work_order_batch.go index ea39284f..c3c67992 100644 --- a/models/response/work_order_batch.go +++ b/models/response/work_order_batch.go @@ -14,9 +14,9 @@ type WorkOrderBatch struct { // It will help with debugging right now so let's leave it be. RequestID string - // NOTE: We have redundancy here as strand ID is :. + // NOTE: We have redundancy here as chunk ID is :. // However, this might change too in the future. - StrandID string + ChunkID string Results BatchResults } diff --git a/mongo/mongo.go b/mongo/mongo.go new file mode 100644 index 00000000..fc2744cb --- /dev/null +++ b/mongo/mongo.go @@ -0,0 +1,38 @@ +package mongo + +import ( + "context" + "fmt" + "time" + + "go.mongodb.org/mongo-driver/v2/mongo" + "go.mongodb.org/mongo-driver/v2/mongo/options" + "go.mongodb.org/mongo-driver/v2/mongo/readpref" +) + +var ( + defaultCompressors = []string{"snappy", "zlib", "zstd"} + defaultPingTimeout = time.Second * 2 +) + +func Connect(ctx context.Context, serverURL string) (*mongo.Client, error) { + + opts := options.Client(). + SetCompressors(defaultCompressors). + ApplyURI(serverURL) + + client, err := mongo.Connect(opts) + if err != nil { + return nil, fmt.Errorf("connection error: %w", err) + } + + pingctx, cancel := context.WithTimeout(ctx, defaultPingTimeout) + defer cancel() + + err = client.Ping(pingctx, readpref.Primary()) + if err != nil { + return nil, fmt.Errorf("ping failed: %w", err) + } + + return client, nil +} diff --git a/node/core.go b/node/core.go index 79dad4dd..7ec478f6 100644 --- a/node/core.go +++ b/node/core.go @@ -3,7 +3,7 @@ package node import ( "context" - "github.com/armon/go-metrics" + "github.com/hashicorp/go-metrics" "github.com/libp2p/go-libp2p/core/peer" "github.com/rs/zerolog" diff --git a/node/head/batch_store.go b/node/head/batch_store.go new file mode 100644 index 00000000..2d53194c --- /dev/null +++ b/node/head/batch_store.go @@ -0,0 +1,219 @@ +package head + +import ( + "context" + "fmt" + "time" + + "github.com/hashicorp/go-multierror" + "github.com/libp2p/go-libp2p/core/peer" + + "github.com/blessnetwork/b7s/models/execute" + "github.com/blessnetwork/b7s/models/request" + "github.com/blessnetwork/b7s/models/response" + batchstore "github.com/blessnetwork/b7s/stores/batch-store" +) + +// TODO: IDs are getting too big. +// TODO: RequestHash can be a byte sequence instead of a hash. + +// itemID is a request hash which is derived from what we are tasked to execute. However this does not need +// to be universally unique. We can have multiple execution requests that are executing the same bless function +// with the same arguments. Hence we need the batchID as well. +func workItemID(batchID string, itemID string) string { + return batchID + "/" + itemID +} + +func (h *HeadNode) saveBatch(batchID string, req request.ExecuteBatch) error { + + batch, items := requestToBatchRecord(batchID, req) + + batch.Status = batchstore.StatusInProgress + + err := h.cfg.BatchStore.CreateBatch(context.TODO(), batch) + if err != nil { + return fmt.Errorf("could not persist batch: %w", err) + } + + err = h.cfg.BatchStore.CreateWorkItems(context.TODO(), items...) + if err != nil { + return fmt.Errorf("could not persist work items: %w", err) + } + + return nil +} + +func (h *HeadNode) saveChunkInfo(batchID string, assignments map[peer.ID]*request.WorkOrderBatch) error { + + err := h.createChunks(batchID, assignments) + if err != nil { + return fmt.Errorf("could not save chunks: %w", err) + } + + // Update work items to set their assignments (associate chunk ID). + err = h.updateWorkOrderAssignments(batchID, assignments) + if err != nil { + return fmt.Errorf("could not update work items to assign chunk ID: %w", err) + } + + return nil +} + +func (h *HeadNode) createChunks(batchID string, assignments map[peer.ID]*request.WorkOrderBatch) error { + + ts := time.Now().UTC() + chunks := make([]*batchstore.ChunkRecord, len(assignments)) + + i := 0 + for peer, chunk := range assignments { + chunks[i] = &batchstore.ChunkRecord{ + ID: chunk.ChunkID, + BatchID: batchID, + Worker: peer.String(), + Status: batchstore.StatusCreated, + CreatedAt: ts, + } + + i++ + } + + return h.cfg.BatchStore.CreateChunks(context.TODO(), chunks...) +} + +func (h *HeadNode) updateWorkOrderAssignments(batchID string, assignments map[peer.ID]*request.WorkOrderBatch) error { + + // TODO: Count how many times we calculate execution ID - this is all hashing and could be costly. + + for peer, chunk := range assignments { + + ids := make([]string, len(chunk.Arguments)) + for i, args := range chunk.Arguments { + ids[i] = workItemID(batchID, string(execute.ExecutionID(chunk.Template.FunctionID, chunk.Template.Method, args))) + } + + // NOTE: Potentially inefficient - one query per chunk. + err := h.cfg.BatchStore.AssignWorkItems(context.TODO(), chunk.ChunkID, ids...) + if err != nil { + return fmt.Errorf("could not update work item assignment (chunk: %v, worker: %v): %w", chunk.ChunkID, peer.String(), err) + } + } + + return nil +} + +func (h *HeadNode) markStartedChunks(batchID string, assignments map[peer.ID]*request.WorkOrderBatch, ignore []peer.ID) error { + + // Faster lookup of chunks to not update. + im := make(map[peer.ID]struct{}) + for _, peer := range ignore { + im[peer] = struct{}{} + } + + // Get list of started chunks so we can update them. + started := make([]string, 0, len(assignments)) + for peer, chunk := range assignments { + _, undelivered := im[peer] + if undelivered { + continue + } + + started = append(started, chunk.ChunkID) + } + + var multierr *multierror.Error + + // Update chunks all at once. + err := h.cfg.BatchStore.UpdateChunkStatus(context.TODO(), batchstore.StatusInProgress, started...) + if err != nil { + multierr = multierror.Append(multierr, err) + } + + // Update work items in larger batches. + for peer, chunk := range assignments { + + _, undelivered := im[peer] + if undelivered { + continue + } + + ids := make([]string, len(chunk.Arguments)) + for i, args := range chunk.Arguments { + ids[i] = workItemID(batchID, string(execute.ExecutionID(chunk.Template.FunctionID, chunk.Template.Method, args))) + } + + err := h.cfg.BatchStore.UpdateWorkItemStatus(context.TODO(), batchstore.StatusInProgress, ids...) + if err != nil { + multierr = multierror.Append(multierr, err) + } + } + + return multierr.ErrorOrNil() +} + +func (h *HeadNode) markCompletedChunks(batchID string, sizes map[string]int, chunkResults map[string]response.NodeChunkResults) error { + + // Group resulting work items by status so we can update them in batches. + completed := make([]string, 0, len(chunkResults)) + //statuses := make(map[batchstore.Status][]string) + statuses := make(map[string]batchstore.WorkItemStatus) + for chunkID, res := range chunkResults { + + for itemID, itemResult := range res.Results { + + status := exitCodeToBatchStoreStatus(itemResult.Result.Result.ExitCode) + + h.Log().Debug(). + Str("batch", batchID). + Str("chunk", chunkID). + Str("item_id", string(itemID)). + Int32("status", int32(status)). + Int("exit_code", itemResult.Result.Result.ExitCode). + Msg("processing chunk work item") + + statuses[workItemID(batchID, string(itemID))] = batchstore.WorkItemStatus{ + Status: status, + Output: itemResult.Result.Result.Stdout, + } + } + + // If we have all of the results - mark the chunk as done. + if len(res.Results) == sizes[chunkID] { + completed = append(completed, chunkID) + } + } + + var merr *multierror.Error + + h.Log().Info(). + Int("count", len(statuses)). + Msg("updating work item status in batch store") + + err := h.cfg.BatchStore.UpdateWorkItemsOutput(context.TODO(), statuses) + if err != nil { + // Logging AND returning the message here but extra context is useful + h.Log().Error().Err(err).Msg("could not update work item status") + + merr = multierror.Append(merr, fmt.Errorf("could not update work item status: %w", err)) + } + + err = h.cfg.BatchStore.UpdateChunkStatus(context.TODO(), batchstore.StatusDone, completed...) + if err != nil { + h.Log().Error(). + Err(err). + Strs("ids", completed). + Msg("could not update chunk statutes") + + err = multierror.Append(merr, fmt.Errorf("could not update chunk status: %w", err)) + } + + return merr.ErrorOrNil() +} + +func exitCodeToBatchStoreStatus(e int) batchstore.Status { + switch e { + case 0: + return batchstore.StatusDone + default: + return batchstore.StatusFailed + } +} diff --git a/node/head/batch_types.go b/node/head/batch_types.go new file mode 100644 index 00000000..0530e660 --- /dev/null +++ b/node/head/batch_types.go @@ -0,0 +1,63 @@ +package head + +import ( + "time" + + "github.com/blessnetwork/b7s/models/execute" + "github.com/blessnetwork/b7s/models/request" + batchstore "github.com/blessnetwork/b7s/stores/batch-store" +) + +// Convert a batchstore record format to request format. +func batchRecordToRequest(batch *batchstore.ExecuteBatchRecord, items []*batchstore.WorkItemRecord) request.ExecuteBatch { + + args := make([][]string, len(items)) + for i, item := range items { + args[i] = item.Arguments + } + + req := request.ExecuteBatch{ + Topic: "", // TODO: Add topic support + Template: request.ExecutionRequestTemplate{ + FunctionID: batch.CID, + Method: batch.Method, + Config: execute.Config(batch.Config), + }, + Arguments: args, + MaxAttempts: batch.MaxAttempts, + } + + return req +} + +// Convert request format to batchstore record format. +func requestToBatchRecord(id string, req request.ExecuteBatch) (*batchstore.ExecuteBatchRecord, []*batchstore.WorkItemRecord) { + + batch := &batchstore.ExecuteBatchRecord{ + ID: id, + CID: req.Template.FunctionID, + Method: req.Template.Method, + Config: req.Template.Config, + Status: batchstore.StatusCreated, + CreatedAt: time.Now().UTC(), + } + + items := make([]*batchstore.WorkItemRecord, len(req.Arguments)) + for i, args := range req.Arguments { + + itemID := execute.ExecutionID( + req.Template.FunctionID, + req.Template.Method, + args) + + items[i] = &batchstore.WorkItemRecord{ + ID: workItemID(id, string(itemID)), + BatchID: id, + Arguments: args, + Status: batchstore.StatusCreated, + Attempts: 0, + } + } + + return batch, items +} diff --git a/node/head/config.go b/node/head/config.go index e150bac5..ef1525df 100644 --- a/node/head/config.go +++ b/node/head/config.go @@ -4,6 +4,8 @@ import ( "time" "github.com/blessnetwork/b7s/consensus" + batchstore "github.com/blessnetwork/b7s/stores/batch-store" + "github.com/blessnetwork/b7s/stores/batch-store/ibs" ) // Option can be used to set Node configuration options. @@ -15,14 +17,33 @@ var DefaultConfig = Config{ ExecutionTimeout: DefaultExecutionTimeout, ClusterFormationTimeout: DefaultClusterFormationTimeout, DefaultConsensus: DefaultConsensusAlgorithm, + WorkItemMaxAttempts: DefaultBatchWorkItemMaxAttempts, + RequeueInterval: DefaultBatchRequeueInterval, + BatchStore: ibs.NewBatchStore(), } // Config represents the Node configuration. type Config struct { - RollCallTimeout time.Duration // How long do we wait for roll call responses. - ExecutionTimeout time.Duration // How long does the head node wait for worker nodes to send their execution results. - ClusterFormationTimeout time.Duration // How long do we wait for the nodes to form a cluster for an execution. - DefaultConsensus consensus.Type // Default consensus algorithm to use. + RollCallTimeout time.Duration // How long do we wait for roll call responses. + ExecutionTimeout time.Duration // How long does the head node wait for worker nodes to send their execution results. + ClusterFormationTimeout time.Duration // How long do we wait for the nodes to form a cluster for an execution. + DefaultConsensus consensus.Type // Default consensus algorithm to use. + BatchStore batchstore.Store // Batch store for persisting batch requests + WorkItemMaxAttempts uint32 // How many times shoud node retry executing a work item before it gives up. + RequeueInterval time.Duration // How often should head node check on batch status and requeue failed items. +} + +// BatchStore sets the batch store to be used by the head node. +func BatchStore(b batchstore.Store) Option { + return func(cfg *Config) { + cfg.BatchStore = b + } +} + +func BatchRequeueInterval(d time.Duration) Option { + return func(cfg *Config) { + cfg.RequeueInterval = d + } } func (c Config) Valid() error { diff --git a/node/head/execute.go b/node/head/execute.go index 57b19f98..34d9852d 100644 --- a/node/head/execute.go +++ b/node/head/execute.go @@ -7,7 +7,7 @@ import ( "fmt" "time" - "github.com/armon/go-metrics" + "github.com/hashicorp/go-metrics" "github.com/libp2p/go-libp2p/core/peer" "go.opentelemetry.io/otel/trace" @@ -218,21 +218,7 @@ func (h *HeadNode) processWorkOrderResponse(ctx context.Context, from peer.ID, r return nil } -func (h *HeadNode) processWorkOrderBatchResponse(ctx context.Context, from peer.ID, res response.WorkOrderBatch) error { - - h.Log().Debug(). - Stringer("from", from). - Str("request", res.RequestID). - Str("strand", res.StrandID). - Msg("received work order batch response") - - key := peerStrandKey(res.RequestID, res.StrandID, from) - h.workOrderBatchResponses.Set(key, res) - - return nil -} - -func peerStrandKey(requestID string, _ string, peer peer.ID) string { +func peerChunkKey(requestID string, _ string, peer peer.ID) string { return requestID + "/" + peer.String() } diff --git a/node/head/execute_batch.go b/node/head/execute_batch.go index 17b0297f..0b86370d 100644 --- a/node/head/execute_batch.go +++ b/node/head/execute_batch.go @@ -5,13 +5,15 @@ import ( "errors" "fmt" + "github.com/hashicorp/go-multierror" "github.com/libp2p/go-libp2p/core/peer" - "github.com/rs/zerolog" "github.com/blessnetwork/b7s/models/bls" "github.com/blessnetwork/b7s/models/codes" + "github.com/blessnetwork/b7s/models/execute" "github.com/blessnetwork/b7s/models/request" "github.com/blessnetwork/b7s/models/response" + batchstore "github.com/blessnetwork/b7s/stores/batch-store" ) type ExecutionBatchAssignments map[peer.ID]*request.WorkOrderBatch @@ -32,17 +34,29 @@ func (h *HeadNode) processExecuteBatch(ctx context.Context, from peer.ID, req re Str("function", req.Template.FunctionID). Int("size", len(req.Arguments)).Logger() - log.Info().Msg("received a batch request") + log.Info().Msg("received batch execution request") - results, err := h.executeBatch(ctx, requestID, req) + // Persist batch and work items. + err := h.saveBatch(requestID, req) + if err != nil { + return fmt.Errorf("could not save batch request: %w", err) + } + + // TODO: Reset "in progress" work items: Head node could send a chunk to a worker, + // and the worker might crash or something. The head node will then consider that chunk as "in progress" + // as it was delivered to the worker, while the worker will (after restart) lose any info about the chunk + // it was sent. When looking into resuming a batch - we must consider these work items - + // - at certain point they should be reset and no longer be considered "in progress". + + // TODO: When a work order batch response is received out of band, status should be updated too. + err = h.startBatchExecution(ctx, requestID, req) if err != nil { return fmt.Errorf("could not execute batch request: %w", err) } - log.Info().Any("results", results).Msg("received batch responses") + log.Info().Msg("started batch execution") - // TODO: Add actual status code. - res := req.Response(codes.OK, requestID).WithResults(results) + res := req.Response(codes.OK, requestID) err = h.Send(ctx, from, res) if err != nil { @@ -52,18 +66,13 @@ func (h *HeadNode) processExecuteBatch(ctx context.Context, from peer.ID, req re return nil } -type batchResults map[string]response.NodeStrandResults - -func (h *HeadNode) executeBatch( +func (h *HeadNode) startBatchExecution( ctx context.Context, requestID string, req request.ExecuteBatch, -) ( - batchResults, - error, -) { +) error { - // TODO: Metrics and tracing + // TODO: Metrics log := h.Log().With(). Str("request", requestID). @@ -73,8 +82,7 @@ func (h *HeadNode) executeBatch( log.Info().Msg("processing batch execution request") // Phase 1. - Issue roll call to nodes. - - rc := rollCallRequest(req.Template.FunctionID, requestID, 0, req.Template.Config.Attributes) + rc := req.RollCall(requestID) rctx, cancel := context.WithTimeout(ctx, h.cfg.ExecutionTimeout) defer cancel() @@ -82,7 +90,7 @@ func (h *HeadNode) executeBatch( // node count is -1 - we want all the nodes that want to work. peers, err := h.executeRollCall(rctx, rc, req.Topic, req.Template.Config.NodeCount) if err != nil { - return nil, fmt.Errorf("could not execute roll call: %w", err) + return fmt.Errorf("could not execute roll call: %w", err) } log.Debug(). @@ -91,91 +99,240 @@ func (h *HeadNode) executeBatch( assignments := partitionWorkBatch(peers, requestID, req) - // TODO: Rethink, useful but ugly. - logAssignments(&log, assignments) + // XXX: + // 1. create chunks in the DB + // 2. update work items to contain chunk information to which they are assigned to. + err = h.saveChunkInfo(requestID, assignments) + if err != nil { + return fmt.Errorf("could not save chunks: %w", err) + } + // Useful but ugly, won't use it in normal operation unless it proves to be required. + // logAssignments(&log, assignments) + + var failedDeliveries []peer.ID err = h.sendBatch(ctx, assignments) if err != nil { var sendErr *batchSendError - if errors.As(err, &sendErr) { - // TODO: Handle partial failures by retrying part of the batch that failed. - log.Warn(). - Strs("peers", bls.PeerIDsToStr(sendErr.Targets())). - Msg("partial failure to send batch requst") + if !errors.As(err, &sendErr) { + return fmt.Errorf("could not send work order batch: %w", err) } - return nil, fmt.Errorf("could not send work order batch: %w", err) + log.Warn(). + Strs("peers", bls.PeerIDsToStr(sendErr.Targets())). + Msg("partial failure to send batch requst") + + failedDeliveries = sendErr.Targets() } - // TODO: Handle errors - reintroduce to the pool. + err = h.markStartedChunks(requestID, assignments, failedDeliveries) + if err != nil { + return fmt.Errorf("could not mark chunks as in-progress: %w", err) + } - // Wait for results. + return nil +} - assignedWorkers := mapKeys(assignments) +// // generic helpers to get keys from a map. No locking or anything. +// func mapKeys[K comparable, V any](m map[K]V) []K { - waitctx, cancel := context.WithTimeout(ctx, h.cfg.ExecutionTimeout) - defer cancel() +// keys := make([]K, 0, len(m)) +// for key := range m { +// keys = append(keys, key) +// } + +// return keys +// } +// +// func logAssignments(log *zerolog.Logger, assignments map[peer.ID]*request.WorkOrderBatch) { +// +// log.Debug(). +// Stringer("peer", peer). +// Int("count", len(assignment.Arguments)). +// Msg("work batch prepared for a peer") +// +// for i, args := range assignment.Arguments { +// log.Debug(). +// Stringer("peer", peer). +// Int("i", i). +// Strs("arguments", args). +// Msg("work order variant") +// } +// } +// } - keyfunc := func(id peer.ID) string { - return peerStrandKey(requestID, assignments[id].StrandID, id) +// Collect any work items for the batch that have not been executed yet and start their execution again. +func (h *HeadNode) continueBatchExecution(ctx context.Context, batch *batchstore.ExecuteBatchRecord) error { + + requestID := batch.ID + + log := h.Log().With().Str("batch", batch.ID).Logger() + + log.Info().Msg("continuing batch execution") + + if batch.Status == batchstore.StatusDone { + log.Info().Msg("batch reported as completed, stopping") + return nil } - batchResults := gatherPeerMessages( - waitctx, - assignedWorkers, - keyfunc, - h.workOrderBatchResponses, - ) + // We want to restart execution of failed items, or those that were created but not started + items, err := h.cfg.BatchStore.FindWorkItems(ctx, requestID, "", batchstore.StatusCreated, batchstore.StatusFailed) + if err != nil { + return fmt.Errorf("could not retrieve work items for batch (batch:%v): %w", requestID, err) + } - strandResults := make(map[string]response.NodeStrandResults) - for peer, res := range batchResults { + threshold := min(h.cfg.WorkItemMaxAttempts, batch.MaxAttempts) + pending, permaFailed := filterWorkItems(items, threshold) - sr := response.NodeStrandResults{ - Peer: peer, - Results: res.Results, - } + if len(permaFailed) > 0 { + go func(ctx context.Context) { - assignment, ok := assignments[peer] - // Should never happen. - if !ok { - return nil, fmt.Errorf("found a batch result for a peer without assignment (request: %v, peer: %v, reported strand id: %v)", - requestID, - peer.String(), - res.StrandID) - } + h.Log().Info().Str("batch", requestID).Int("count", len(permaFailed)). + Msg("marking work items as permanently failed") + + formatWorkRecordIDs := func(items []*batchstore.WorkItemRecord) []string { + ids := make([]string, 0, len(items)) + for i, item := range items { + ids[i] = workItemID(requestID, string(execute.ExecutionID(batch.CID, batch.Method, item.Arguments))) + } - strandResults[assignment.StrandID] = sr + return ids + } + + err = h.cfg.BatchStore.UpdateWorkItemStatus(ctx, batchstore.StatusPermanentlyFailed, formatWorkRecordIDs(permaFailed)...) + if err != nil { + log.Error().Err(err).Msg("could not mark items as permanently failed") + } + }(ctx) + } + + if len(pending) == 0 { + + h.Log().Info().Str("batch", requestID). + Msg("no pending work items - marking batch as done") + + return h.cfg.BatchStore.UpdateBatchStatus(ctx, batchstore.StatusDone, requestID) + } + + h.Log().Info().Str("batch", requestID).Int("pending", len(pending)). + Msg("requeuing batch work items") + + // TODO: We should no longer use the original number of nodes - we might only be processing 2% of work items, no reason to request the original N number of workers. + err = h.startBatchExecution(ctx, requestID, batchRecordToRequest(batch, pending)) + if err != nil { + return fmt.Errorf("could not continue batch execution: %w", err) } - return strandResults, nil + return nil } -// generic helpers to get keys from a map. No locking or anything. -func mapKeys[K comparable, V any](m map[K]V) []K { +// Split work item list into two categories: +// pending - created or failed ones +// perma failed - items that failed execution N times +func filterWorkItems(items []*batchstore.WorkItemRecord, threshold uint32) ([]*batchstore.WorkItemRecord, []*batchstore.WorkItemRecord) { + + var ( + pending = make([]*batchstore.WorkItemRecord, 0, len(items)) + permaFailed []*batchstore.WorkItemRecord + ) + + for _, item := range items { + + switch item.Status { + case batchstore.StatusCreated: + pending = append(pending, item) - keys := make([]K, 0, len(m)) - for key := range m { - keys = append(keys, key) + case batchstore.StatusFailed: + + if item.Attempts >= uint32(threshold) { + permaFailed = append(permaFailed, item) + continue + } + + pending = append(pending, item) + } } - return keys + return pending, permaFailed } -func logAssignments(log *zerolog.Logger, assignments map[peer.ID]*request.WorkOrderBatch) { +func (h *HeadNode) processWorkOrderBatchResponse(ctx context.Context, from peer.ID, res response.WorkOrderBatch) error { + + log := h.Log().With(). + Stringer("from", from). + Str("batch", res.RequestID). + Str("chunk", res.ChunkID). + Logger() + + log.Debug().Msg("received work order batch response") + + // Perhaps on batch resume, node should first check the batch response cache and update the status for those work items. + + chunk, err := h.cfg.BatchStore.GetChunk(ctx, res.ChunkID) + if err != nil { + return fmt.Errorf("no matching chunk found (batch: %v, chunk: %v, peer: %v)", res.RequestID, res.ChunkID, from.String()) + } + + if chunk.Worker != from.String() { + return fmt.Errorf("unexpected worker returned result (chunk: %v, expected: %v, got: %v)", res.RequestID, chunk.Worker, from.String()) + } + + // We'll convert this into a map as it's a more usable format for what we need. + statuses := make(map[string]batchstore.WorkItemStatus) + for itemID, itemResult := range res.Results { + + status := exitCodeToBatchStoreStatus(itemResult.Result.Result.ExitCode) - for peer, assignment := range assignments { log.Debug(). - Stringer("peer", peer). - Int("count", len(assignment.Arguments)). - Msg("work batch prepared for a peer") - - for i, args := range assignment.Arguments { - log.Debug(). - Stringer("peer", peer). - Int("i", i). - Strs("arguments", args). - Msg("work order variant") + Str("item_id", string(itemID)). + Int32("status", int32(status)). + Int("exit_code", itemResult.Result.Result.ExitCode). + Msg("processing chunk work item") + + statuses[workItemID(chunk.BatchID, string(itemID))] = batchstore.WorkItemStatus{ + Status: status, + Output: itemResult.Result.Result.Stdout, + } + } + + // Now that we have a map - we can use it for lookup to make sure all items we have are eligible to be updated. + // For example - all work items this node returned actually do belong to this chunk. + items, err := h.cfg.BatchStore.FindWorkItems(ctx, "", res.ChunkID, batchstore.StatusInProgress) + if err != nil { + return fmt.Errorf("could not retrieve chunk work items (chunk: %v): %w", res.ChunkID, err) + } + + for _, item := range items { + if res.ChunkID != item.ChunkID { + return fmt.Errorf("item received from worker belongs to a different chunk (item: %v received_chunk: %v, actual: %v)", + item.ID, res.ChunkID, item.ChunkID) + } + } + + h.Log().Info(). + Int("count", len(statuses)). + Msg("updating work item status in batch store") + + var merr *multierror.Error + + err = h.cfg.BatchStore.UpdateWorkItemsOutput(ctx, statuses) + if err != nil { + // Logging AND returning the message here but extra context is useful + log.Error().Err(err).Msg("could not update work item status") + + merr = multierror.Append(merr, fmt.Errorf("could not update work item status: %w", err)) + } + + if len(items) == len(statuses) { + err = h.cfg.BatchStore.UpdateChunkStatus(ctx, batchstore.StatusDone, res.ChunkID) + if err != nil { + // Logging AND returning the message here but extra context is useful + log.Error().Err(err).Msg("could not update chunk status") + + merr = multierror.Append(merr, fmt.Errorf("could not update chunk status: %w", err)) } } + + return merr.ErrorOrNil() } diff --git a/node/head/execution_results.go b/node/head/execution_results.go index 8aea17f5..2d29696f 100644 --- a/node/head/execution_results.go +++ b/node/head/execution_results.go @@ -9,7 +9,6 @@ import ( "github.com/blessnetwork/b7s/consensus/pbft" "github.com/blessnetwork/b7s/models/execute" - "github.com/blessnetwork/b7s/models/response" "github.com/blessnetwork/b7s/node/internal/waitmap" ) @@ -157,8 +156,6 @@ func gatherPeerMessages[T any]( wm *waitmap.WaitMap[string, T], ) map[peer.ID]T { - // TODO: Provide a limited context now. - var ( results = make(map[peer.ID]T) reslock sync.Mutex @@ -189,42 +186,3 @@ func gatherPeerMessages[T any]( return results } - -func (h *HeadNode) gatherBatchResults(ctx context.Context, requestID string, strandID string, peers []peer.ID) map[peer.ID]response.WorkOrderBatch { - - // We're willing to wait for a limited amount of time. - exctx, exCancel := context.WithTimeout(ctx, h.cfg.ExecutionTimeout) - defer exCancel() - - var ( - results = make(map[peer.ID]response.WorkOrderBatch) - reslock sync.Mutex - wg sync.WaitGroup - ) - - wg.Add(len(peers)) - - // Wait on peers asynchronously. - for _, rp := range peers { - rp := rp - - go func(peer peer.ID) { - defer wg.Done() - key := peerStrandKey(requestID, strandID, peer) - res, ok := h.workOrderBatchResponses.WaitFor(exctx, key) - if !ok { - return - } - - h.Log().Info().Str("peer", peer.String()).Msg("accounted execution response from peer") - - reslock.Lock() - defer reslock.Unlock() - results[peer] = res - }(rp) - } - - wg.Wait() - - return results -} diff --git a/node/head/head.go b/node/head/head.go index 046f154a..551d7154 100644 --- a/node/head/head.go +++ b/node/head/head.go @@ -3,15 +3,17 @@ package head import ( "context" "fmt" + "time" - "github.com/armon/go-metrics" "github.com/google/uuid" + "github.com/hashicorp/go-metrics" "github.com/blessnetwork/b7s/info" "github.com/blessnetwork/b7s/models/execute" "github.com/blessnetwork/b7s/models/response" "github.com/blessnetwork/b7s/node" "github.com/blessnetwork/b7s/node/internal/waitmap" + batchstore "github.com/blessnetwork/b7s/stores/batch-store" ) type HeadNode struct { @@ -19,15 +21,14 @@ type HeadNode struct { cfg Config - rollCall *rollCallQueue - consensusResponses *waitmap.WaitMap[string, response.FormCluster] - workOrderResponses *waitmap.WaitMap[string, execute.NodeResult] - workOrderBatchResponses *waitmap.WaitMap[string, response.WorkOrderBatch] + rollCall *rollCallQueue + consensusResponses *waitmap.WaitMap[string, response.FormCluster] + workOrderResponses *waitmap.WaitMap[string, execute.NodeResult] } func New(core node.Core, options ...Option) (*HeadNode, error) { - // InitiaStrandResultsize config. + // InitiaChunkResultsize config. cfg := DefaultConfig for _, option := range options { option(&cfg) @@ -42,10 +43,9 @@ func New(core node.Core, options ...Option) (*HeadNode, error) { Core: core, cfg: cfg, - rollCall: newQueue(rollCallQueueBufferSize), - consensusResponses: waitmap.New[string, response.FormCluster](0), - workOrderResponses: waitmap.New[string, execute.NodeResult](executionResultCacheSize), - workOrderBatchResponses: waitmap.New[string, response.WorkOrderBatch](executionResultCacheSize), + rollCall: newQueue(rollCallQueueBufferSize), + consensusResponses: waitmap.New[string, response.FormCluster](0), + workOrderResponses: waitmap.New[string, execute.NodeResult](executionResultCacheSize), } head.Metrics().SetGaugeWithLabels(node.NodeInfoMetric, 1, @@ -59,9 +59,71 @@ func New(core node.Core, options ...Option) (*HeadNode, error) { } func (h *HeadNode) Run(ctx context.Context) error { + + go func(ctx context.Context) { + + // Not a perfect solution, but the simplest one - wait a little while + // until some of the peers connect. + time.Sleep(batchResumeDelay) + + // Run first sync immediately. + err := h.resumeUnfinishedBatches(ctx) + if err != nil { + h.Log().Error().Err(err). + Msg("could not resume incomplete batches") + } + + ticker := time.NewTicker(h.cfg.RequeueInterval) + defer ticker.Stop() + + for { + select { + case <-ticker.C: + + err := h.resumeUnfinishedBatches(ctx) + if err != nil { + h.Log().Error().Err(err). + Msg("could not resume incomplete batches") + } + + case <-ctx.Done(): + h.Log().Info().Msg("stopping batch resume loop") + } + } + }(ctx) + return h.Core.Run(ctx, h.process) } +func (h *HeadNode) resumeUnfinishedBatches(ctx context.Context) error { + + batches, err := h.cfg.BatchStore.FindBatches(ctx, batchstore.StatusInProgress, batchstore.StatusCreated) + if err != nil { + return fmt.Errorf("could not lookup incomplete batches: %w", err) + } + + if len(batches) == 0 { + return nil + } + + h.Log().Info().Int("count", len(batches)). + Msg("found unfinished batches") + + // NOTE: Batches are processed sequentially. Potentially this could be done in parallel. + for _, batch := range batches { + + err = h.continueBatchExecution(ctx, batch) + if err != nil { + h.Log().Error().Str("batch", batch.ID).Err(err). + Msg("countinued batch execution failed") + + continue + } + } + + return nil +} + func newRequestID() string { return newUUID() } diff --git a/node/head/params.go b/node/head/params.go index ba0ad96e..e2cf4484 100644 --- a/node/head/params.go +++ b/node/head/params.go @@ -7,13 +7,16 @@ import ( ) const ( - DefaultRollCallTimeout = 5 * time.Second - DefaultExecutionTimeout = 20 * time.Second - DefaultClusterFormationTimeout = 10 * time.Second - DefaultConsensusAlgorithm = consensus.Raft + DefaultRollCallTimeout = 5 * time.Second + DefaultExecutionTimeout = 20 * time.Second + DefaultClusterFormationTimeout = 10 * time.Second + DefaultConsensusAlgorithm = consensus.Raft + DefaultBatchWorkItemMaxAttempts = 10 + DefaultBatchRequeueInterval = time.Hour rollCallQueueBufferSize = 1000 executionResultCacheSize = 1000 + batchResumeDelay = 1 * time.Minute defaultExecutionThreshold = 0.6 diff --git a/node/head/rest.go b/node/head/rest.go index 6566c6ad..f970a034 100644 --- a/node/head/rest.go +++ b/node/head/rest.go @@ -5,11 +5,14 @@ import ( "crypto/sha256" "fmt" + "github.com/libp2p/go-libp2p/core/peer" + "github.com/blessnetwork/b7s/models/bls" "github.com/blessnetwork/b7s/models/codes" "github.com/blessnetwork/b7s/models/execute" "github.com/blessnetwork/b7s/models/request" "github.com/blessnetwork/b7s/models/response" + batchstore "github.com/blessnetwork/b7s/stores/batch-store" ) // ExecuteFunction can be used to start function execution. At the moment this is used by the API server to start execution on the head node. @@ -25,7 +28,7 @@ func (h *HeadNode) ExecuteFunction(ctx context.Context, req execute.Request, sub return code, requestID, results, cluster, nil } -func (h *HeadNode) ExecuteFunctionBatch(ctx context.Context, req request.ExecuteBatch) (*response.ExecuteBatch, error) { +func (h *HeadNode) StartFunctionBatchExecution(ctx context.Context, req request.ExecuteBatch) (string, error) { requestID := newRequestID() @@ -36,17 +39,20 @@ func (h *HeadNode) ExecuteFunctionBatch(ctx context.Context, req request.Execute log.Info().Msg("processing batch execution request via API") - results, err := h.executeBatch(ctx, requestID, req) + // Persist batch and work items. + err := h.saveBatch(requestID, req) if err != nil { - return nil, fmt.Errorf("could not execute batch request: %w", err) + return "", fmt.Errorf("could not save batch request: %w", err) } - log.Info().Any("results", results).Msg("received batch responses") - - // TODO: Add actual status code. - res := req.Response(codes.OK, requestID).WithResults(results) + go func() { + err := h.startBatchExecution(context.Background(), requestID, req) + if err != nil { + h.Log().Error().Err(err).Str("batch", requestID).Msg("could not execute batch") + } + }() - return res, nil + return requestID, nil } // ExecutionResult fetches the execution result from the node cache. @@ -83,6 +89,75 @@ func (h *HeadNode) PublishFunctionInstall(ctx context.Context, uri string, cid s return nil } +func (h *HeadNode) GetBatchResults(ctx context.Context, id string) (*response.ExecuteBatch, error) { + + batch, err := h.cfg.BatchStore.GetBatch(ctx, id) + if err != nil { + return nil, fmt.Errorf("could not retrieve batch result: %w", err) + } + + // We will need to group work items according to the group they belong to. + chunks, err := h.cfg.BatchStore.FindChunks(ctx, batch.ID) + if err != nil { + return nil, fmt.Errorf("could not retrieve chunks for batch: %w", err) + } + + // Find all work items belonging to this batch. + items, err := h.cfg.BatchStore.FindWorkItems(ctx, batch.ID, "") + if err != nil { + return nil, fmt.Errorf("could not retrieve work items for batch: %w", err) + } + + lookup := make(map[string]*batchstore.ChunkRecord) + for _, chunk := range chunks { + lookup[chunk.ID] = chunk + } + + oc := make(map[string]response.NodeChunkResults) + for _, item := range items { + + chunk, ok := lookup[item.ChunkID] + if !ok { + h.Log().Warn().Str("batch", batch.ID).Str("work_item", item.ID).Str("chunk", item.ChunkID). + Msg("chunk not found for work item") + continue + } + + _, ok = oc[item.ChunkID] + if !ok { + + id, err := peer.Decode(chunk.Worker) + if err != nil { + return nil, fmt.Errorf("invalid peer ID found (id: %s): %w", chunk.Worker, err) + } + + oc[item.ChunkID] = response.NodeChunkResults{ + Peer: id, + Results: make(map[execute.RequestHash]*response.BatchFunctionResult), + } + } + + hash := execute.ExecutionID(batch.CID, batch.Method, item.Arguments) + oc[item.ChunkID].Results[hash] = &response.BatchFunctionResult{ + NodeResult: execute.NodeResult{ + Result: execute.Result{Result: execute.RuntimeOutput{ + Stdout: item.Output, + }}, + }, + FunctionInvocation: execute.FunctionInvocation(batch.CID, batch.Method), + Arguments: item.Arguments, + } + } + + out := &response.ExecuteBatch{ + RequestID: id, + Code: codes.OK, // TODO: Be more precise in this, not all executions are "OK". + Chunks: oc, + } + + return out, nil +} + // createInstallMessageFromURI creates a MsgInstallFunction from the given URI. // CID is calculated as a SHA-256 hash of the URI. func createInstallMessageFromURI(uri string) (request.InstallFunction, error) { diff --git a/node/head/roll_call.go b/node/head/roll_call.go index dfd0b520..1cbac698 100644 --- a/node/head/roll_call.go +++ b/node/head/roll_call.go @@ -5,15 +5,13 @@ import ( "context" "fmt" - "github.com/armon/go-metrics" + "github.com/hashicorp/go-metrics" "github.com/libp2p/go-libp2p/core/peer" - "github.com/blessnetwork/b7s/consensus" cons "github.com/blessnetwork/b7s/consensus" "github.com/blessnetwork/b7s/consensus/pbft" "github.com/blessnetwork/b7s/models/bls" "github.com/blessnetwork/b7s/models/codes" - "github.com/blessnetwork/b7s/models/execute" "github.com/blessnetwork/b7s/models/request" "github.com/blessnetwork/b7s/models/response" ) @@ -69,12 +67,14 @@ rollCallResponseLoop: case reply := <-h.rollCall.responses(requestID): - // Check if this is the reply we want - shouldn't really happen. - if reply.FunctionID != rc.FunctionID { + // Check if this is the reply we want - should be the same function and node should report if they + // support batch executions. + if reply.FunctionID != rc.FunctionID || reply.BatchSupport != rc.Batch { log.Info(). Stringer("peer", reply.From). Str("function_got", reply.FunctionID). - Msg("skipping inadequate roll call response - wrong function") + Bool("batch_support", reply.BatchSupport). + Msg("skipping inadequate roll call response") continue } @@ -93,7 +93,9 @@ rollCallResponseLoop: // -1 means we'll take any peers reporting if len(reportingPeers) >= nodeCount && nodeCount != -1 { - log.Info().Msg("enough peers reported for roll call") + log.Info(). + Int("count", len(reportingPeers)). + Msg("enough peers reported for roll call") break rollCallResponseLoop } } @@ -158,14 +160,3 @@ func (h *HeadNode) processRollCallResponse(ctx context.Context, from peer.ID, re return nil } - -// TODO: RollCall must have trace info propagated. -func rollCallRequest(function string, id string, c consensus.Type, attributes *execute.Attributes) *request.RollCall { - return &request.RollCall{ - // BaseMessage: bls.BaseMessage{TraceInfo: req.TraceInfo}, - RequestID: id, - FunctionID: function, - Consensus: c, - Attributes: attributes, - } -} diff --git a/node/head/telemetry_params.go b/node/head/telemetry_params.go index ac4947fa..63c57caa 100644 --- a/node/head/telemetry_params.go +++ b/node/head/telemetry_params.go @@ -1,12 +1,13 @@ package head import ( - "github.com/armon/go-metrics/prometheus" + "github.com/hashicorp/go-metrics/prometheus" ) // Tracing span names. const ( - spanExecute = "Execute" + spanExecute = "Execute" + spanExecuteBatch = "ExecuteBatch" ) var ( diff --git a/node/head/work_partition.go b/node/head/work_partition.go index 4a99bb21..05f7a083 100644 --- a/node/head/work_partition.go +++ b/node/head/work_partition.go @@ -1,8 +1,6 @@ package head import ( - "fmt" - "github.com/libp2p/go-libp2p/core/peer" "github.com/blessnetwork/b7s/models/request" @@ -30,13 +28,13 @@ func partitionWorkBatch(peers []peer.ID, requestID string, req request.ExecuteBa assignments := make(map[peer.ID]*request.WorkOrderBatch) for _, peer := range peers { - strandID := newStrandID(requestID) - assignments[peer] = req.WorkOrderBatch(requestID, strandID, a[peer]...) + chunkID := newChunkID() + assignments[peer] = req.WorkOrderBatch(requestID, chunkID, a[peer]...) } return assignments } -func newStrandID(requestID string) string { - return fmt.Sprintf("%v:%v", requestID, newRequestID()) +func newChunkID() string { + return newUUID() } diff --git a/node/message.go b/node/message.go index a696269d..0cc23ba5 100644 --- a/node/message.go +++ b/node/message.go @@ -5,7 +5,7 @@ import ( "encoding/json" "fmt" - "github.com/armon/go-metrics" + "github.com/hashicorp/go-metrics" "github.com/hashicorp/go-multierror" pubsub "github.com/libp2p/go-libp2p-pubsub" "github.com/libp2p/go-libp2p/core/peer" diff --git a/node/process.go b/node/process.go index 73e01cd2..8d5b1317 100644 --- a/node/process.go +++ b/node/process.go @@ -5,7 +5,7 @@ import ( "encoding/json" "fmt" - "github.com/armon/go-metrics" + "github.com/hashicorp/go-metrics" "github.com/libp2p/go-libp2p/core/peer" otelcodes "go.opentelemetry.io/otel/codes" diff --git a/node/run.go b/node/run.go index d5631444..3d086140 100644 --- a/node/run.go +++ b/node/run.go @@ -8,7 +8,7 @@ import ( "io" "sync" - "github.com/armon/go-metrics" + "github.com/hashicorp/go-metrics" pubsub "github.com/libp2p/go-libp2p-pubsub" "github.com/libp2p/go-libp2p/core/network" "github.com/libp2p/go-libp2p/core/peer" diff --git a/node/telemetry_params.go b/node/telemetry_params.go index 86f26beb..c260fa47 100644 --- a/node/telemetry_params.go +++ b/node/telemetry_params.go @@ -3,7 +3,7 @@ package node import ( "fmt" - "github.com/armon/go-metrics/prometheus" + "github.com/hashicorp/go-metrics/prometheus" ) // Tracing span names. diff --git a/node/worker/roll_call.go b/node/worker/roll_call.go index c2eb09af..b89f93c4 100644 --- a/node/worker/roll_call.go +++ b/node/worker/roll_call.go @@ -4,7 +4,7 @@ import ( "context" "fmt" - "github.com/armon/go-metrics" + "github.com/hashicorp/go-metrics" "github.com/libp2p/go-libp2p/core/peer" "github.com/blessnetwork/b7s/consensus" diff --git a/node/worker/telemetry_params.go b/node/worker/telemetry_params.go index 5ef16c44..2f4b8adf 100644 --- a/node/worker/telemetry_params.go +++ b/node/worker/telemetry_params.go @@ -1,18 +1,21 @@ package worker import ( - "github.com/armon/go-metrics/prometheus" + "github.com/hashicorp/go-metrics/prometheus" ) // Tracing span names. const ( - spanWorkOrder = "WorkOrder" + spanWorkOrder = "WorkOrder" + spanWorkOrderBatch = "WorkOrderBatch" + spanExecute = "Execute" ) var ( rollCallsSeenMetric = []string{"node", "rollcalls", "seen"} rollCallsAppliedMetric = []string{"node", "rollcalls", "applied"} workOrderMetric = []string{"node", "workorders"} + workOrderBatchesMetric = []string{"node", "workorder_batches"} ) var Counters = []prometheus.CounterDefinition{ diff --git a/node/worker/work_order.go b/node/worker/work_order.go index ef5e633f..67234f3b 100644 --- a/node/worker/work_order.go +++ b/node/worker/work_order.go @@ -6,7 +6,7 @@ import ( "fmt" "time" - "github.com/armon/go-metrics" + "github.com/hashicorp/go-metrics" "github.com/libp2p/go-libp2p/core/peer" "go.opentelemetry.io/otel/trace" @@ -70,6 +70,9 @@ func (w *Worker) processWorkOrder(ctx context.Context, from peer.ID, req request func (w *Worker) execute(ctx context.Context, requestID string, timestamp time.Time, req execute.Request, from peer.ID) (codes.Code, execute.Result, error) { + ctx, span := w.Tracer().Start(ctx, spanExecute, trace.WithAttributes(tracing.ExecutionAttributes(requestID, req)...)) + defer span.End() + // Check if we have function in store. functionInstalled, err := w.fstore.IsInstalled(req.FunctionID) if err != nil { diff --git a/node/worker/work_order_batch.go b/node/worker/work_order_batch.go index a3845d93..daea026f 100644 --- a/node/worker/work_order_batch.go +++ b/node/worker/work_order_batch.go @@ -5,29 +5,26 @@ import ( "fmt" "time" + "github.com/hashicorp/go-metrics" "github.com/libp2p/go-libp2p/core/peer" + "go.opentelemetry.io/otel/trace" "github.com/blessnetwork/b7s/models/execute" "github.com/blessnetwork/b7s/models/request" "github.com/blessnetwork/b7s/models/response" + "github.com/blessnetwork/b7s/telemetry/b7ssemconv" ) -// TODO: Perhaps move this and keep it in a single place. -type StrandResult struct { - FunctionInvocation string - Arguments []string - Result execute.Result - Metadata any -} - func (w *Worker) processWorkOrderBatch(ctx context.Context, from peer.ID, req request.WorkOrderBatch) error { + w.Metrics().IncrCounterWithLabels(workOrderBatchesMetric, 1, []metrics.Label{{Name: "function", Value: req.Template.FunctionID}}) + requestID := req.RequestID - strandID := req.StrandID + chunkID := req.ChunkID log := w.Log().With(). Str("request", requestID). - Str("strand", strandID). + Str("chunk", chunkID). Str("function", req.Template.FunctionID). Logger() @@ -36,20 +33,27 @@ func (w *Worker) processWorkOrderBatch(ctx context.Context, from peer.ID, req re Uint("concurrency", req.ConcurrencyLimit). Msg("received a batch work order") - // TODO: Handle parallelism + ctx, span := w.Tracer().Start(ctx, spanWorkOrderBatch, trace.WithAttributes( + b7ssemconv.FunctionCID.String(req.Template.FunctionID), + b7ssemconv.FunctionMethod.String(req.Template.Method), + b7ssemconv.ExecutionNodeCount.Int(req.Template.Config.NodeCount), + b7ssemconv.ExecutionRequestID.String(requestID), + )) + defer span.End() + + // NOTE: We might want to execute these in parallel in the future results := make(map[execute.RequestHash]*response.BatchFunctionResult) for _, args := range req.Arguments { - // TODO: Fill this in. er := execute.Request{ FunctionID: req.Template.FunctionID, Method: req.Template.Method, Config: req.Template.Config, Arguments: args, } - _, result, err := w.execute(ctx, req.StrandID, time.Now(), er, from) + _, result, err := w.execute(ctx, req.ChunkID, time.Now(), er, from) if err != nil { log.Error().Err(err).Stringer("peer", from).Msg("execution failed") } @@ -59,9 +63,9 @@ func (w *Worker) processWorkOrderBatch(ctx context.Context, from peer.ID, req re log.Error().Err(err).Msg("could not get metadata from the execution result") } - chunkID := execute.GetExecutionID(er) + chunkID := er.GetExecutionID() results[chunkID] = &response.BatchFunctionResult{ - FunctionInvocation: execute.FunctionInvocation(er), + FunctionInvocation: execute.FunctionInvocation(er.FunctionID, er.Method), Arguments: args, NodeResult: execute.NodeResult{ Result: result, @@ -72,7 +76,7 @@ func (w *Worker) processWorkOrderBatch(ctx context.Context, from peer.ID, req re res := response.WorkOrderBatch{ RequestID: req.RequestID, - StrandID: req.StrandID, + ChunkID: req.ChunkID, Results: results, } err := w.Send(ctx, from, res) diff --git a/node/worker/worker.go b/node/worker/worker.go index 5d7a96ed..6ad92ff1 100644 --- a/node/worker/worker.go +++ b/node/worker/worker.go @@ -4,7 +4,7 @@ import ( "context" "fmt" - "github.com/armon/go-metrics" + "github.com/hashicorp/go-metrics" "github.com/blessnetwork/b7s/info" "github.com/blessnetwork/b7s/models/bls" diff --git a/stores/batch-store/ibs/batch_store.go b/stores/batch-store/ibs/batch_store.go new file mode 100644 index 00000000..f4101122 --- /dev/null +++ b/stores/batch-store/ibs/batch_store.go @@ -0,0 +1,362 @@ +package ibs + +import ( + "context" + "errors" + "sync" + + batchstore "github.com/blessnetwork/b7s/stores/batch-store" +) + +var _ batchstore.Store = (*BatchStore)(nil) + +type BatchStore struct { + *sync.RWMutex + + batches map[string]*batchstore.ExecuteBatchRecord + chunks map[string]*batchstore.ChunkRecord + items map[string]*batchstore.WorkItemRecord +} + +func NewBatchStore() *BatchStore { + bs := &BatchStore{ + RWMutex: &sync.RWMutex{}, + batches: make(map[string]*batchstore.ExecuteBatchRecord), + chunks: make(map[string]*batchstore.ChunkRecord), + items: make(map[string]*batchstore.WorkItemRecord), + } + + return bs +} + +func (s *BatchStore) CreateBatch(ctx context.Context, rec *batchstore.ExecuteBatchRecord) error { + s.Lock() + defer s.Unlock() + + s.batches[rec.ID] = rec + return nil +} + +func (s *BatchStore) GetBatch(ctx context.Context, id string) (*batchstore.ExecuteBatchRecord, error) { + s.RLock() + defer s.RUnlock() + + rec, ok := s.batches[id] + if !ok { + return nil, errors.New("batch not found") + } + + return rec, nil +} + +func (s *BatchStore) UpdateBatch(ctx context.Context, rec *batchstore.ExecuteBatchRecord) error { + s.Lock() + defer s.Unlock() + + s.batches[rec.ID] = rec + return nil +} + +func (s *BatchStore) UpdateBatchStatus(ctx context.Context, status int32, id string) error { + s.Lock() + defer s.Unlock() + + _, ok := s.batches[id] + if !ok { + return errors.New("batch not found") + } + + s.batches[id].Status = status + return nil +} + +func (s *BatchStore) DeleteBatch(ctx context.Context, id string) error { + s.Lock() + defer s.Unlock() + + delete(s.batches, id) + + return nil +} + +func (s *BatchStore) FindBatches(ctx context.Context, statuses ...int32) ([]*batchstore.ExecuteBatchRecord, error) { + s.RLock() + defer s.RUnlock() + + lookup := make(map[int32]struct{}) + for _, s := range statuses { + lookup[s] = struct{}{} + } + + var batches []*batchstore.ExecuteBatchRecord + for _, batch := range s.batches { + + if len(lookup) > 0 { + _, ok := lookup[batch.Status] + if !ok { + continue + } + } + + batches = append(batches, batch) + } + + return batches, nil +} + +func (s *BatchStore) CreateChunks(ctx context.Context, chunks ...*batchstore.ChunkRecord) error { + s.Lock() + defer s.Unlock() + + for _, chunk := range chunks { + s.chunks[chunk.ID] = chunk + } + + return nil +} + +func (s *BatchStore) GetChunk(ctx context.Context, id string) (*batchstore.ChunkRecord, error) { + s.RLock() + defer s.RUnlock() + + rec, ok := s.chunks[id] + if !ok { + return nil, errors.New("chunk not found") + } + + return rec, nil +} + +func (s *BatchStore) GetBatchChunks(ctx context.Context, batchID string) ([]*batchstore.ChunkRecord, error) { + s.RLock() + defer s.RUnlock() + + var results []*batchstore.ChunkRecord + for _, chunk := range s.chunks { + if chunk.BatchID == batchID { + results = append(results, chunk) + } + } + + return results, nil +} + +func (s *BatchStore) UpdateChunk(ctx context.Context, rec *batchstore.ChunkRecord) error { + s.Lock() + defer s.Unlock() + + _, ok := s.chunks[rec.ID] + if !ok { + return errors.New("chunk not found") + } + + s.chunks[rec.ID] = rec + + return nil +} + +func (s *BatchStore) UpdateChunkStatus(ctx context.Context, status int32, ids ...string) error { + s.Lock() + defer s.Unlock() + + for _, id := range ids { + + _, ok := s.chunks[id] + if !ok { + return errors.New("chunk not found") + } + + s.chunks[id].Status = status + } + + return nil +} + +func (s *BatchStore) DeleteChunks(ctx context.Context, ids ...string) error { + s.Lock() + defer s.Unlock() + + for _, id := range ids { + delete(s.chunks, id) + } + + return nil +} + +func (s *BatchStore) CreateWorkItems(ctx context.Context, items ...*batchstore.WorkItemRecord) error { + s.Lock() + defer s.Unlock() + + for _, rec := range items { + s.items[rec.ID] = rec + } + + return nil +} + +func (s *BatchStore) GetWorkItem(ctx context.Context, id string) (*batchstore.WorkItemRecord, error) { + s.RLock() + defer s.RUnlock() + + rec, ok := s.items[id] + if !ok { + return nil, errors.New("work item not found") + } + + return rec, nil +} + +func (s *BatchStore) UpdateWorkItem(ctx context.Context, rec *batchstore.WorkItemRecord) error { + s.Lock() + defer s.Unlock() + + _, ok := s.items[rec.ID] + if !ok { + return errors.New("work item not found") + } + + s.items[rec.ID] = rec + + return nil +} + +func (s *BatchStore) UpdateWorkItemStatus(ctx context.Context, status int32, ids ...string) error { + s.Lock() + defer s.Unlock() + + for _, id := range ids { + + _, ok := s.items[id] + if !ok { + return errors.New("work item not found") + } + + s.items[id].Status = status + } + + return nil +} + +func (s *BatchStore) UpdateWorkItemsOutput(ctx context.Context, statuses map[string]batchstore.WorkItemStatus) error { + + s.Lock() + defer s.Unlock() + + for id, status := range statuses { + + _, ok := s.items[id] + if !ok { + return errors.New("work item not found") + } + + s.items[id].Status = int32(status.Status) + s.items[id].Output = status.Output + } + + return nil +} + +func (s *BatchStore) DeleteWorkItems(ctx context.Context, ids ...string) error { + s.Lock() + defer s.Unlock() + + for _, id := range ids { + delete(s.items, id) + } + + return nil +} + +func (s *BatchStore) AssignWorkItems(ctx context.Context, chunkID string, ids ...string) error { + s.Lock() + defer s.Unlock() + + for _, id := range ids { + _, ok := s.items[id] + if !ok { + return errors.New("item not found") + } + + s.items[id].ChunkID = chunkID + } + + return nil +} + +func (s *BatchStore) FindWorkItems(ctx context.Context, batchID string, chunkID string, statuses ...int32) ([]*batchstore.WorkItemRecord, error) { + s.RLock() + defer s.RUnlock() + + lookup := make(map[int32]struct{}) + for _, s := range statuses { + lookup[s] = struct{}{} + } + + var results []*batchstore.WorkItemRecord + for _, item := range s.items { + + if batchID != "" && item.BatchID != batchID { + continue + } + + if chunkID != "" && item.ChunkID != chunkID { + continue + } + + if len(lookup) > 0 { + _, ok := lookup[item.Status] + if !ok { + continue + } + } + + results = append(results, item) + } + + return results, nil +} + +func (s *BatchStore) FindChunks(ctx context.Context, batchID string, statuses ...int32) ([]*batchstore.ChunkRecord, error) { + s.RLock() + defer s.RUnlock() + + if batchID == "" { + return nil, errors.New("batch ID is required") + } + + lookup := make(map[int32]struct{}) + for _, s := range statuses { + lookup[s] = struct{}{} + } + + var results []*batchstore.ChunkRecord + for _, chunk := range s.chunks { + + if len(lookup) > 0 { + _, ok := lookup[chunk.Status] + if !ok { + continue + } + } + + results = append(results, chunk) + } + + return results, nil +} + +// func (s *BatchStore) dumpData(w io.Writer, msg string) { +// s.RLock() +// defer s.RUnlock() +// +// rec := map[string]any{ +// "msg": msg, +// "batches": s.batches, +// "chunks": s.chunks, +// "work_items": s.items, +// } +// +// data, _ := json.MarshalIndent(rec, "", "\t") +// +// fmt.Fprintf(w, "%s\n", data) +// } diff --git a/stores/batch-store/ibs/doc.go b/stores/batch-store/ibs/doc.go new file mode 100644 index 00000000..66a8e110 --- /dev/null +++ b/stores/batch-store/ibs/doc.go @@ -0,0 +1,2 @@ +// In-Memory Batch Store (ibs) is a BatchStore which stores batch requests in-memory. +package ibs diff --git a/stores/batch-store/mbs/batch.go b/stores/batch-store/mbs/batch.go new file mode 100644 index 00000000..5fcf51c0 --- /dev/null +++ b/stores/batch-store/mbs/batch.go @@ -0,0 +1,112 @@ +package mbs + +import ( + "context" + "fmt" + "time" + + batchstore "github.com/blessnetwork/b7s/stores/batch-store" + "go.mongodb.org/mongo-driver/v2/bson" +) + +// TODO: Handle timestamps correctly. + +func (s *BatchStore) CreateBatch(ctx context.Context, rec *batchstore.ExecuteBatchRecord) error { + + _, err := s.batches.InsertOne(ctx, rec) + if err != nil { + return fmt.Errorf("could not save batch: %w", err) + } + + return nil +} + +func (s *BatchStore) GetBatch(ctx context.Context, id string) (*batchstore.ExecuteBatchRecord, error) { + + var rec batchstore.ExecuteBatchRecord + err := s.batches.FindOne( + ctx, + bson.M{"id": id}, + ).Decode(&rec) + if err != nil { + return nil, fmt.Errorf("could not retrieve batch: %w", err) + } + + return &rec, nil +} + +func (s *BatchStore) UpdateBatch(ctx context.Context, rec *batchstore.ExecuteBatchRecord) error { + + // modding input record + rec.UpdatedAt = time.Now().UTC() + + _, err := s.batches.UpdateOne( + ctx, + bson.M{"id": rec.ID}, + bson.M{"$set": rec}, + ) + if err != nil { + return fmt.Errorf("could not update batch: %w", err) + } + + return nil +} + +func (s *BatchStore) UpdateBatchStatus(ctx context.Context, status int32, id string) error { + + _, err := s.batches.UpdateOne( + ctx, + bson.M{"id": id}, + bson.M{"$set": bson.M{ + "status": status, + "updated_at": time.Now().UTC(), + }}, + ) + if err != nil { + return fmt.Errorf("could not update batch status: %w", err) + } + + return nil +} + +func (s *BatchStore) DeleteBatch(ctx context.Context, id string) error { + + _, err := s.batches.DeleteOne( + ctx, + bson.M{"id": id}, + ) + if err != nil { + return fmt.Errorf("could not delete batch: %w", err) + } + + return nil +} + +func (s *BatchStore) FindBatches(ctx context.Context, statuses ...int32) ([]*batchstore.ExecuteBatchRecord, error) { + + query := make(map[string]any) + + sn := len(statuses) + if sn == 1 { + // Exact match for status + query["status"] = statuses[0] + } else if sn > 1 { + // We have a list of statuses. + query["status"] = map[string]any{ + "$in": statuses, + } + } + + cursor, err := s.batches.Find(ctx, query) + if err != nil { + return nil, fmt.Errorf("could not lookup batches: %w", err) + } + + var batches []*batchstore.ExecuteBatchRecord + err = cursor.All(ctx, &batches) + if err != nil { + return nil, fmt.Errorf("could not decode found batches: %w", err) + } + + return batches, nil +} diff --git a/stores/batch-store/mbs/batch_store.go b/stores/batch-store/mbs/batch_store.go new file mode 100644 index 00000000..7b24d2c7 --- /dev/null +++ b/stores/batch-store/mbs/batch_store.go @@ -0,0 +1,102 @@ +package mbs + +import ( + "context" + "errors" + "fmt" + + "go.mongodb.org/mongo-driver/v2/bson" + "go.mongodb.org/mongo-driver/v2/mongo" + "go.mongodb.org/mongo-driver/v2/mongo/options" +) + +const ( + mongoNamespaceExistsCode = 48 +) + +type BatchStore struct { + cfg Config + cli *mongo.Client + + batches *mongo.Collection + chunks *mongo.Collection + items *mongo.Collection +} + +func NewBatchStore(cli *mongo.Client, opts ...OptionFunc) (*BatchStore, error) { + + cfg := defaultConfig + for _, opt := range opts { + opt(&cfg) + } + + store := &BatchStore{ + cfg: cfg, + cli: cli, + } + + return store, nil +} + +func (s *BatchStore) Init(ctx context.Context) error { + + if s.cfg.initCollections { + err := s.createCollections(ctx) + if err != nil { + return fmt.Errorf("could not create collections: %w", err) + } + } + + db := s.cli.Database(s.cfg.dbname) + s.batches = db.Collection(batchesCollection) + s.chunks = db.Collection(chunksCollection) + s.items = db.Collection(workItemCollection) + + return nil +} + +func (s *BatchStore) createCollections(ctx context.Context) error { + + // TODO: Chunk or worker association? + collections := map[string][]byte{ + batchesCollection: batchCollectionSchema, + chunksCollection: chunkCollectionSchema, + workItemCollection: workItemCollectionSchema, + } + + for collection, schema := range collections { + + var compiled bson.M + err := bson.UnmarshalExtJSON(schema, true, &compiled) + if err != nil { + return fmt.Errorf("invalid collection schema definition (collection: %v): %w", collection, err) + } + + options := options.CreateCollection().SetValidator(compiled) + + err = s.cli.Database(s.cfg.dbname).CreateCollection(ctx, collection, options) + + // TODO: Honour config option. + + // NOTE: Because of how mongo (or the go driver) checks if the collection options are identical, we are a little less strict here. + // Schema gets compiled to an unordered map and checked against the existing collection + if err != nil && !isNamespaceExists(err) { + return fmt.Errorf("could not create collection: %w", err) + } + } + + return nil +} + +func isNamespaceExists(err error) bool { + var se mongo.ServerError + if !errors.As(err, &se) { + return false + } + + if se.HasErrorCode(mongoNamespaceExistsCode) { + return true + } + + return false +} diff --git a/stores/batch-store/mbs/batch_test.go b/stores/batch-store/mbs/batch_test.go new file mode 100644 index 00000000..8a1e31e2 --- /dev/null +++ b/stores/batch-store/mbs/batch_test.go @@ -0,0 +1,103 @@ +//go:build integration +// +build integration + +package mbs_test + +import ( + "fmt" + "math/rand/v2" + "os" + "testing" + + "github.com/google/uuid" + "github.com/stretchr/testify/require" + "go.mongodb.org/mongo-driver/v2/mongo" + + b7smongo "github.com/blessnetwork/b7s/mongo" + batchstore "github.com/blessnetwork/b7s/stores/batch-store" + "github.com/blessnetwork/b7s/stores/batch-store/mbs" +) + +const ( + MongoDBConnectionEnv = "B7S_MONGO_DB_ADDRESS" + MongoDBName = "B7S_MONGO_DB_NAME" +) + +func TestBatchStore_Batch(t *testing.T) { + + var ( + client = getDBClient(t) + ctx = t.Context() + ) + + store, err := mbs.NewBatchStore(client) + require.NoError(t, err) + + err = store.Init(ctx) + require.NoError(t, err) + + batch := batchstore.ExecuteBatchRecord{ + ID: uuid.New().String(), + CID: "test-cid", + Method: "method.wasm", + Status: 0, + } + + t.Run("create", func(t *testing.T) { + + err = store.CreateBatch(ctx, &batch) + require.NoError(t, err) + }) + t.Run("get", func(t *testing.T) { + + id := batch.ID + retrieved, err := store.GetBatch(ctx, id) + require.NoError(t, err) + require.Equal(t, batch, *retrieved) + }) + t.Run("update", func(t *testing.T) { + + copy := batch + copy.CID = batch.CID + fmt.Sprint(rand.Int()) + + err = store.UpdateBatch(ctx, ©) + require.NoError(t, err) + + retrieved, err := store.GetBatch(ctx, copy.ID) + require.NoError(t, err) + + require.Equal(t, copy, *retrieved) + }) + t.Run("update status", func(t *testing.T) { + + status := rand.Int32N(11) + + err = store.UpdateBatchStatus(ctx, status, batch.ID) + require.NoError(t, err) + + retrieved, err := store.GetBatch(ctx, batch.ID) + require.NoError(t, err) + + require.Equal(t, status, retrieved.Status) + // TODO: Remaining fields should be unchanged equal. + }) + t.Run("delete", func(t *testing.T) { + + err = store.DeleteBatch(ctx, batch.ID) + require.NoError(t, err) + + _, err := store.GetBatch(ctx, batch.ID) + require.Error(t, err) + }) +} + +func getDBClient(t *testing.T) *mongo.Client { + t.Helper() + + addr := os.Getenv(MongoDBConnectionEnv) + + client, err := b7smongo.Connect(t.Context(), addr) + require.NoError(t, err) + + return client +} diff --git a/stores/batch-store/mbs/chunk.go b/stores/batch-store/mbs/chunk.go new file mode 100644 index 00000000..1e9fbee5 --- /dev/null +++ b/stores/batch-store/mbs/chunk.go @@ -0,0 +1,118 @@ +package mbs + +import ( + "context" + "errors" + "fmt" + "time" + + batchstore "github.com/blessnetwork/b7s/stores/batch-store" + "go.mongodb.org/mongo-driver/v2/bson" +) + +func (s *BatchStore) CreateChunks(ctx context.Context, chunks ...*batchstore.ChunkRecord) error { + + _, err := s.chunks.InsertMany(ctx, chunks) + if err != nil { + return fmt.Errorf("could not save chunk: %w", err) + } + + return nil +} + +func (s *BatchStore) GetChunk(ctx context.Context, id string) (*batchstore.ChunkRecord, error) { + + var rec batchstore.ChunkRecord + err := s.chunks.FindOne( + ctx, + bson.M{"id": id}, + ).Decode(&rec) + if err != nil { + return nil, fmt.Errorf("could not retrieve chunk: %w", err) + } + + return &rec, nil +} + +func (s *BatchStore) GetBatchChunks(ctx context.Context, batchID string) ([]*batchstore.ChunkRecord, error) { + return nil, errors.New("TBD: not implemented") +} + +func (s *BatchStore) UpdateChunk(ctx context.Context, rec *batchstore.ChunkRecord) error { + + // modding input record + rec.UpdatedAt = time.Now().UTC() + + _, err := s.chunks.UpdateOne( + ctx, + bson.M{"id": rec.ID}, + bson.M{"$set": rec}, + ) + if err != nil { + return fmt.Errorf("could not update chunk: %w", err) + } + + return nil +} + +func (s *BatchStore) UpdateChunkStatus(ctx context.Context, status int32, ids ...string) error { + + _, err := s.chunks.UpdateMany( + ctx, + bson.M{"id": bson.M{"$in": ids}}, + bson.M{"$set": bson.M{ + "status": status, + "updated_at": time.Now().UTC(), + }}, + ) + if err != nil { + return fmt.Errorf("could not update chunk: %w", err) + } + + return nil +} + +func (s *BatchStore) DeleteChunks(ctx context.Context, ids ...string) error { + + _, err := s.chunks.DeleteMany( + ctx, + bson.M{"id": bson.M{"$in": ids}}, + ) + if err != nil { + return fmt.Errorf("could not delete chunk: %w", err) + } + + return nil +} + +func (s *BatchStore) FindChunks(ctx context.Context, batchID string, statuses ...int32) ([]*batchstore.ChunkRecord, error) { + + if batchID == "" { + return nil, errors.New("batch ID is required") + } + + query := make(map[string]any) + query["batch_id"] = batchID + + sn := len(statuses) + if sn == 1 { + query["status"] = statuses[0] + } else if sn > 1 { + query["status"] = map[string]any{ + "$in": statuses, + } + } + + cursor, err := s.chunks.Find(ctx, query) + if err != nil { + return nil, fmt.Errorf("could not lookup chunks: %w", err) + } + + var chunks []*batchstore.ChunkRecord + err = cursor.All(ctx, &chunks) + if err != nil { + return nil, fmt.Errorf("could not decode found chunks: %w", err) + } + + return chunks, nil +} diff --git a/stores/batch-store/mbs/chunk_test.go b/stores/batch-store/mbs/chunk_test.go new file mode 100644 index 00000000..b870f004 --- /dev/null +++ b/stores/batch-store/mbs/chunk_test.go @@ -0,0 +1,127 @@ +//go:build integration +// +build integration + +package mbs_test + +import ( + "fmt" + "math/rand/v2" + "testing" + + "github.com/stretchr/testify/require" + + batchstore "github.com/blessnetwork/b7s/stores/batch-store" + "github.com/blessnetwork/b7s/stores/batch-store/mbs" +) + +func TestBatchStore_Chunks(t *testing.T) { + + var ( + client = getDBClient(t) + ctx = t.Context() + recordCount = 10 + ) + + store, err := mbs.NewBatchStore(client) + require.NoError(t, err) + + err = store.Init(ctx) + require.NoError(t, err) + + chunks := newChunks(t, recordCount) + ids := make([]string, len(chunks)) + for i, chunk := range chunks { + ids[i] = chunk.ID + } + + t.Run("create single chunk", func(t *testing.T) { + err = store.CreateChunks(ctx, chunks[0]) + require.NoError(t, err) + }) + t.Run("create many chunks", func(t *testing.T) { + err = store.CreateChunks(ctx, chunks[1:]...) + require.NoError(t, err) + }) + t.Run("get chunk", func(t *testing.T) { + + for i, chunk := range chunks { + + id := chunk.ID + + retrieved, err := store.GetChunk(ctx, id) + require.NoError(t, err) + require.Equal(t, chunks[i], retrieved) + } + + }) + t.Run("update", func(t *testing.T) { + + orig := chunks[0] + + copy := *orig + copy.BatchID = copy.BatchID + fmt.Sprint(rand.Int32N(10)) + + err = store.UpdateChunk(ctx, ©) + require.NoError(t, err) + + retrieved, err := store.GetChunk(ctx, copy.ID) + require.NoError(t, err) + + require.Equal(t, copy, *retrieved) + }) + t.Run("update status", func(t *testing.T) { + + var ( + chunkID = ids[0] + status = rand.Int32() + ) + + err = store.UpdateChunkStatus(ctx, status, chunkID) + require.NoError(t, err) + + // Verify change of first chunk. + retrieved, err := store.GetChunk(ctx, chunkID) + require.NoError(t, err) + require.Equal(t, status, retrieved.Status) + }) + t.Run("update multiple statuses", func(t *testing.T) { + + var ( + status = rand.Int32() + ) + + err = store.UpdateChunkStatus(ctx, status, ids...) + require.NoError(t, err) + + for _, id := range ids { + retrieved, err := store.GetChunk(ctx, id) + require.NoError(t, err) + require.Equal(t, status, retrieved.Status) + } + }) + t.Run("delete chunks", func(t *testing.T) { + + err = store.DeleteChunks(ctx, ids...) + require.NoError(t, err) + + for _, chunk := range chunks { + // Retrieving chunks should fail as they are deleted by now. + _, err := store.GetChunk(ctx, chunk.ID) + require.Error(t, err) + } + }) +} + +func newChunks(t *testing.T, n int) []*batchstore.ChunkRecord { + + chunks := make([]*batchstore.ChunkRecord, n) + for i := range n { + chunks[i] = &batchstore.ChunkRecord{ + ID: fmt.Sprintf("test.chunk-%v", rand.Int()), + BatchID: fmt.Sprintf("test-request-id-%v", rand.Int()), + Status: 0, + } + } + + return chunks +} diff --git a/stores/batch-store/mbs/config.go b/stores/batch-store/mbs/config.go new file mode 100644 index 00000000..1e446030 --- /dev/null +++ b/stores/batch-store/mbs/config.go @@ -0,0 +1,33 @@ +package mbs + +type Config struct { + dbname string + initCollections bool + ignoreInitNamespaceErrors bool +} + +var defaultConfig = Config{ + dbname: "b7s-db", + initCollections: true, + ignoreInitNamespaceErrors: true, +} + +type OptionFunc func(*Config) + +func DBName(name string) OptionFunc { + return func(cfg *Config) { + cfg.dbname = name + } +} + +func InitCollections(b bool) OptionFunc { + return func(cfg *Config) { + cfg.initCollections = b + } +} + +func IgnoreInitErrors(b bool) OptionFunc { + return func(cfg *Config) { + cfg.ignoreInitNamespaceErrors = b + } +} diff --git a/stores/batch-store/mbs/doc.go b/stores/batch-store/mbs/doc.go new file mode 100644 index 00000000..054b1777 --- /dev/null +++ b/stores/batch-store/mbs/doc.go @@ -0,0 +1,2 @@ +// Mongo Batch Store (mbs) is a BatchStore which uses MongoDB for underlying storage. +package mbs diff --git a/stores/batch-store/mbs/schema.go b/stores/batch-store/mbs/schema.go new file mode 100644 index 00000000..c3cec2d2 --- /dev/null +++ b/stores/batch-store/mbs/schema.go @@ -0,0 +1,28 @@ +package mbs + +import ( + _ "embed" +) + +const ( + batchesCollection = "b7s-batches" + chunksCollection = "b7s-batch-chunks" + workItemCollection = "b7s-batch-work-items" +) + +// Collections: +// - batches +// - chunks +// - work items + +// TODO: Model the timestamps +// TODO: Timestamps should be mandatory + +//go:embed validation/batches.json +var batchCollectionSchema []byte + +//go:embed validation/chunks.json +var chunkCollectionSchema []byte + +//go:embed validation/work_items.json +var workItemCollectionSchema []byte diff --git a/stores/batch-store/mbs/validation/batches.json b/stores/batch-store/mbs/validation/batches.json new file mode 100644 index 00000000..007fb6db --- /dev/null +++ b/stores/batch-store/mbs/validation/batches.json @@ -0,0 +1,28 @@ +{ + "$jsonSchema": { + "bsonType": "object", + "required": [ + "id", + "cid", + "method" + ], + "properties": { + "id": { + "bsonType": "string", + "description": "b7s execution request ID" + }, + "cid": { + "bsonType": "string", + "description": "IPFS CID of the b7s function to execute" + }, + "method": { + "bsonType": "string", + "description": "Method of the b7s function to execute" + }, + "status": { + "bsonType": "int", + "description": "status of this execution request" + } + } + } +} \ No newline at end of file diff --git a/stores/batch-store/mbs/validation/chunks.json b/stores/batch-store/mbs/validation/chunks.json new file mode 100644 index 00000000..c53975ee --- /dev/null +++ b/stores/batch-store/mbs/validation/chunks.json @@ -0,0 +1,23 @@ +{ + "$jsonSchema": { + "bsonType": "object", + "required": [ + "id", + "batch_id" + ], + "properties": { + "id": { + "bsonType": "string", + "description": "chunk ID" + }, + "batch_id": { + "bsonType": "string", + "description": "ID of the parent execution request" + }, + "status": { + "bsonType": "int", + "description": "status of this execution chunk" + } + } + } +} \ No newline at end of file diff --git a/stores/batch-store/mbs/validation/work_items.json b/stores/batch-store/mbs/validation/work_items.json new file mode 100644 index 00000000..6d87632e --- /dev/null +++ b/stores/batch-store/mbs/validation/work_items.json @@ -0,0 +1,35 @@ +{ + "$jsonSchema": { + "bsonType": "object", + "required": [ + "id", + "batch_id" + ], + "properties": { + "id": { + "bsonType": "string", + "description": "work item ID" + }, + "batch_id": { + "bsonType": "string", + "description": "ID of the parent execution request" + }, + "chunk_id": { + "bsonType": "string", + "description": "ID of the chunk this item belongs to" + }, + "arguments": { + "bsonType": "array", + "description": "CLI arguments for b7s function" + }, + "status": { + "bsonType": "int", + "description": "status of this execution item" + }, + "attempts": { + "bsonType": "int", + "description": "number of execution attempts" + } + } + } +} \ No newline at end of file diff --git a/stores/batch-store/mbs/work_item.go b/stores/batch-store/mbs/work_item.go new file mode 100644 index 00000000..d1d93101 --- /dev/null +++ b/stores/batch-store/mbs/work_item.go @@ -0,0 +1,178 @@ +package mbs + +import ( + "context" + "fmt" + "time" + + "go.mongodb.org/mongo-driver/v2/bson" + "go.mongodb.org/mongo-driver/v2/mongo" + "go.mongodb.org/mongo-driver/v2/mongo/options" + + batchstore "github.com/blessnetwork/b7s/stores/batch-store" +) + +func (s *BatchStore) CreateWorkItems(ctx context.Context, rec ...*batchstore.WorkItemRecord) error { + + _, err := s.items.InsertMany(ctx, rec) + if err != nil { + return fmt.Errorf("could not insert work item: %w", err) + } + + return nil +} + +func (s *BatchStore) GetWorkItem(ctx context.Context, id string) (*batchstore.WorkItemRecord, error) { + + var item batchstore.WorkItemRecord + err := s.items.FindOne( + ctx, + bson.M{"id": id}, + ).Decode(&item) + if err != nil { + return nil, fmt.Errorf("could not retrieve work item: %w", err) + } + + return &item, nil +} + +func (s *BatchStore) UpdateWorkItem(ctx context.Context, rec *batchstore.WorkItemRecord) error { + + // modding input record + rec.UpdatedAt = time.Now().UTC() + + _, err := s.items.UpdateOne( + ctx, + bson.M{"id": rec.ID}, + bson.M{"$set": rec}, + ) + if err != nil { + return fmt.Errorf("could not update work item: %w", err) + } + + return nil +} + +func (s *BatchStore) UpdateWorkItemStatus(ctx context.Context, status int32, ids ...string) error { + + query := bson.M{"$set": bson.M{ + "status": status, + "updated_at": time.Now().UTC(), + }} + + if status == batchstore.StatusFailed { + query["$inc"] = bson.M{ + "attempts": 1, + } + } + + _, err := s.items.UpdateMany( + ctx, + bson.M{"id": bson.M{"$in": ids}}, + query, + ) + if err != nil { + return fmt.Errorf("could not update work item: %w", err) + } + + return nil +} + +func (s *BatchStore) UpdateWorkItemsOutput(ctx context.Context, statuses map[string]batchstore.WorkItemStatus) error { + + ts := time.Now().UTC() + + var models []mongo.WriteModel + + for id, s := range statuses { + + update := mongo.NewUpdateOneModel(). + SetFilter(bson.M{"id": id}). + SetUpdate(bson.M{ + "$set": bson.M{ + "output": s.Output, + "status": s.Status, + "updated_at": ts, + }}) + + models = append(models, update) + } + + opts := options.BulkWrite().SetOrdered(false) + + _, err := s.items.BulkWrite(ctx, models, opts) + if err != nil { + return fmt.Errorf("could not update work items: %w", err) + } + + return nil +} + +func (s *BatchStore) FindWorkItems(ctx context.Context, batchID string, chunkID string, statuses ...int32) ([]*batchstore.WorkItemRecord, error) { + + query := make(map[string]any) + + if batchID != "" { + query["batch_id"] = batchID + } + + if chunkID != "" { + query["chunk_id"] = chunkID + } + + sn := len(statuses) + if sn == 1 { + // Exact match for status + query["status"] = statuses[0] + } else if sn > 1 { + // We have a list of statuses. + query["status"] = map[string]any{ + "$in": statuses, + } + } + + cursor, err := s.items.Find(ctx, query) + if err != nil { + return nil, fmt.Errorf("could not lookup items: %w", err) + } + + var items []*batchstore.WorkItemRecord + err = cursor.All(ctx, &items) + if err != nil { + return nil, fmt.Errorf("could not decode found work items: %w", err) + } + + return items, nil +} + +func (s *BatchStore) DeleteWorkItems(ctx context.Context, ids ...string) error { + + _, err := s.items.DeleteMany( + ctx, + bson.M{"id": bson.M{"$in": ids}}, + ) + if err != nil { + return fmt.Errorf("could not delete work item: %w", err) + } + + return nil +} + +// TODO: If there's too many IDs query can get troubling and we might need to consider chunking up the input list. +func (s *BatchStore) AssignWorkItems(ctx context.Context, chunkID string, ids ...string) error { + + _, err := s.items.UpdateMany( + ctx, + bson.M{"id": bson.M{"$in": ids}}, + bson.M{"$set": bson.M{ + "chunk_id": chunkID, + "status": batchstore.StatusInProgress, + "updated_at": time.Now().UTC(), + }}, + ) + if err != nil { + return fmt.Errorf("could not assign work item: %w", err) + } + + return nil +} diff --git a/stores/batch-store/mbs/work_item_test.go b/stores/batch-store/mbs/work_item_test.go new file mode 100644 index 00000000..cf2662a9 --- /dev/null +++ b/stores/batch-store/mbs/work_item_test.go @@ -0,0 +1,128 @@ +//go:build integration +// +build integration + +package mbs_test + +import ( + "fmt" + "math/rand/v2" + "testing" + + "github.com/stretchr/testify/require" + + batchstore "github.com/blessnetwork/b7s/stores/batch-store" + "github.com/blessnetwork/b7s/stores/batch-store/mbs" +) + +func TestBatchStore_WorkItem(t *testing.T) { + + var ( + client = getDBClient(t) + ctx = t.Context() + itemCount = 10 + ) + + store, err := mbs.NewBatchStore(client) + require.NoError(t, err) + + err = store.Init(ctx) + require.NoError(t, err) + + items := newWorkItems(t, itemCount) + ids := make([]string, len(items)) + for i, item := range items { + ids[i] = item.ID + } + t.Run("create single items", func(t *testing.T) { + err = store.CreateWorkItems(ctx, items[0]) + require.NoError(t, err) + }) + t.Run("create multiple items items", func(t *testing.T) { + err = store.CreateWorkItems(ctx, items[1:]...) + require.NoError(t, err) + }) + t.Run("get work items", func(t *testing.T) { + + for i, item := range items { + + id := item.ID + retrieved, err := store.GetWorkItem(ctx, id) + require.NoError(t, err) + require.Equal(t, items[i], retrieved) + + } + }) + t.Run("update", func(t *testing.T) { + + orig := items[0] + copy := *orig + copy.BatchID = copy.BatchID + fmt.Sprint(rand.Int32N(10)) + + err = store.UpdateWorkItem(ctx, ©) + require.NoError(t, err) + + retrieved, err := store.GetWorkItem(ctx, copy.ID) + require.NoError(t, err) + + require.Equal(t, copy.BatchID, retrieved.BatchID) + require.True(t, retrieved.UpdatedAt.After(orig.UpdatedAt)) + }) + t.Run("update status", func(t *testing.T) { + + var ( + itemID = ids[0] + status = rand.Int32N(11) + ) + + err = store.UpdateWorkItemStatus(ctx, status, itemID) + require.NoError(t, err) + + retrieved, err := store.GetWorkItem(ctx, itemID) + require.NoError(t, err) + + require.Equal(t, status, retrieved.Status) + }) + t.Run("update multiple statuses", func(t *testing.T) { + + var ( + status = rand.Int32N(11) + ) + + err = store.UpdateWorkItemStatus(ctx, status, ids...) + require.NoError(t, err) + + for _, item := range items { + + id := item.ID + retrieved, err := store.GetWorkItem(ctx, id) + require.NoError(t, err) + require.Equal(t, status, retrieved.Status) + } + }) + t.Run("delete items", func(t *testing.T) { + + err = store.DeleteWorkItems(ctx, ids...) + require.NoError(t, err) + + for _, item := range items { + _, err := store.GetWorkItem(ctx, item.ID) + require.Error(t, err) + } + }) +} + +func newWorkItems(t *testing.T, n int) []*batchstore.WorkItemRecord { + t.Helper() + + items := make([]*batchstore.WorkItemRecord, n) + for i := range n { + items[i] = &batchstore.WorkItemRecord{ + ID: fmt.Sprintf("test.work-item-%v", rand.Int()), + ChunkID: fmt.Sprintf("test.chunk-%v", rand.Int()), + BatchID: fmt.Sprintf("test-request-id-%v", rand.Int()), + Status: 0, + } + } + + return items +} diff --git a/stores/batch-store/models.go b/stores/batch-store/models.go new file mode 100644 index 00000000..52d35ec9 --- /dev/null +++ b/stores/batch-store/models.go @@ -0,0 +1,47 @@ +package batchstore + +import ( + "time" + + "github.com/blessnetwork/b7s/models/execute" +) + +type ExecuteBatchRecord struct { + ID string `bson:"id,omitempty"` + CID string `bson:"cid,omitempty"` + Method string `bson:"method,omitempty"` + Config Config `bson:"config,omitempty"` + MaxAttempts uint32 `bson:"max_attempts,omitempty"` + Status int32 `bson:"status"` + CreatedAt time.Time `bson:"created_at,omitempty"` + UpdatedAt time.Time `bson:"updated_at,omitempty"` +} + +// NOTE: Pulling this in as a dependency to avoid duplicate models, though I don't like the import. +type Config = execute.Config + +type ChunkRecord struct { + ID string `bson:"id,omitempty"` + BatchID string `bson:"batch_id,omitempty"` + Worker string `bson:"worker,omitempty"` + Status int32 `bson:"status"` + CreatedAt time.Time `bson:"created_at,omitempty"` + UpdatedAt time.Time `bson:"updated_at,omitempty"` +} + +type WorkItemRecord struct { + ID string `bson:"id,omitempty"` + BatchID string `bson:"batch_id,omitempty"` // Technically not mandatory here, but is good to have locality of data. + ChunkID string `bson:"chunk_id,omitempty"` + Arguments []string `bson:"arguments,omitempty"` + Status int32 `bson:"status"` + Attempts uint32 `bson:"attempts,omitempty"` + Output string `bson:"output,omitempty"` + CreatedAt time.Time `bson:"created_at,omitempty"` + UpdatedAt time.Time `bson:"updated_at,omitempty"` +} + +type WorkItemStatus struct { + Output string + Status Status +} diff --git a/stores/batch-store/store.go b/stores/batch-store/store.go new file mode 100644 index 00000000..343038eb --- /dev/null +++ b/stores/batch-store/store.go @@ -0,0 +1,51 @@ +package batchstore + +import ( + "context" +) + +type Status int32 + +const ( + StatusCreated = 0 + StatusInProgress = 1 + StatusFailed = -1 + StatusPermanentlyFailed = -2 + StatusDone = 100 +) + +type Store interface { + BatchStore + ChunkStore + WorkItemStore +} + +type BatchStore interface { + CreateBatch(ctx context.Context, rec *ExecuteBatchRecord) error + GetBatch(ctx context.Context, id string) (*ExecuteBatchRecord, error) + UpdateBatch(ctx context.Context, rec *ExecuteBatchRecord) error + UpdateBatchStatus(ctx context.Context, status int32, id string) error + DeleteBatch(ctx context.Context, id string) error + FindBatches(ctx context.Context, statuses ...int32) ([]*ExecuteBatchRecord, error) +} + +type ChunkStore interface { + CreateChunks(ctx context.Context, rec ...*ChunkRecord) error + GetChunk(ctx context.Context, id string) (*ChunkRecord, error) + GetBatchChunks(ctx context.Context, batchID string) ([]*ChunkRecord, error) + UpdateChunk(ctx context.Context, rec *ChunkRecord) error + UpdateChunkStatus(ctx context.Context, status int32, ids ...string) error + DeleteChunks(ctx context.Context, ids ...string) error + FindChunks(ctx context.Context, batchID string, statuses ...int32) ([]*ChunkRecord, error) +} + +type WorkItemStore interface { + CreateWorkItems(ctx context.Context, rec ...*WorkItemRecord) error + GetWorkItem(ctx context.Context, id string) (*WorkItemRecord, error) + UpdateWorkItem(ctx context.Context, rec *WorkItemRecord) error + UpdateWorkItemStatus(ctx context.Context, status int32, ids ...string) error + UpdateWorkItemsOutput(ctx context.Context, statuses map[string]WorkItemStatus) error + DeleteWorkItems(ctx context.Context, ids ...string) error + AssignWorkItems(ctx context.Context, chunkID string, ids ...string) error + FindWorkItems(ctx context.Context, batchID string, chunkID string, statuses ...int32) ([]*WorkItemRecord, error) +} diff --git a/store/codec.go b/stores/store/codec.go similarity index 100% rename from store/codec.go rename to stores/store/codec.go diff --git a/store/codec/json.go b/stores/store/codec/json.go similarity index 100% rename from store/codec/json.go rename to stores/store/codec/json.go diff --git a/store/key.go b/stores/store/key.go similarity index 100% rename from store/key.go rename to stores/store/key.go diff --git a/store/key_test.go b/stores/store/key_test.go similarity index 100% rename from store/key_test.go rename to stores/store/key_test.go diff --git a/store/keys.go b/stores/store/keys.go similarity index 100% rename from store/keys.go rename to stores/store/keys.go diff --git a/store/params.go b/stores/store/params.go similarity index 100% rename from store/params.go rename to stores/store/params.go diff --git a/store/remove.go b/stores/store/remove.go similarity index 100% rename from store/remove.go rename to stores/store/remove.go diff --git a/store/retrieve.go b/stores/store/retrieve.go similarity index 100% rename from store/retrieve.go rename to stores/store/retrieve.go diff --git a/store/save.go b/stores/store/save.go similarity index 100% rename from store/save.go rename to stores/store/save.go diff --git a/store/store.go b/stores/store/store.go similarity index 100% rename from store/store.go rename to stores/store/store.go diff --git a/store/store_test.go b/stores/store/store_test.go similarity index 98% rename from store/store_test.go rename to stores/store/store_test.go index e1c07f45..0f8d8778 100644 --- a/store/store_test.go +++ b/stores/store/store_test.go @@ -11,8 +11,8 @@ import ( "github.com/stretchr/testify/require" "github.com/blessnetwork/b7s/models/bls" - "github.com/blessnetwork/b7s/store" - "github.com/blessnetwork/b7s/store/codec" + "github.com/blessnetwork/b7s/stores/store" + "github.com/blessnetwork/b7s/stores/store/codec" "github.com/blessnetwork/b7s/testing/helpers" "github.com/blessnetwork/b7s/testing/mocks" ) diff --git a/store/traceable/params.go b/stores/store/traceable/params.go similarity index 100% rename from store/traceable/params.go rename to stores/store/traceable/params.go diff --git a/store/traceable/traceable.go b/stores/store/traceable/traceable.go similarity index 98% rename from store/traceable/traceable.go rename to stores/store/traceable/traceable.go index 08043c99..3d642ffb 100644 --- a/store/traceable/traceable.go +++ b/stores/store/traceable/traceable.go @@ -8,7 +8,7 @@ import ( "go.opentelemetry.io/otel/trace" "github.com/blessnetwork/b7s/models/bls" - "github.com/blessnetwork/b7s/store" + "github.com/blessnetwork/b7s/stores/store" "github.com/blessnetwork/b7s/telemetry/b7ssemconv" "github.com/blessnetwork/b7s/telemetry/tracing" ) diff --git a/telemetry/b7ssemconv/semconv.go b/telemetry/b7ssemconv/semconv.go index f748c66c..7bcbcd5c 100644 --- a/telemetry/b7ssemconv/semconv.go +++ b/telemetry/b7ssemconv/semconv.go @@ -20,8 +20,9 @@ const ( ) const ( - FunctionCID = attribute.Key("function.cid") - FunctionMethod = attribute.Key("function.method") + FunctionCID = attribute.Key("function.cid") + FunctionMethod = attribute.Key("function.method") + FunctionArguments = attribute.Key("function.arguments") ) const ( diff --git a/telemetry/metrics.go b/telemetry/metrics.go index 55971c36..c7d1d8e8 100644 --- a/telemetry/metrics.go +++ b/telemetry/metrics.go @@ -5,8 +5,8 @@ import ( "net/http" "time" - "github.com/armon/go-metrics" - mp "github.com/armon/go-metrics/prometheus" + "github.com/hashicorp/go-metrics" + mp "github.com/hashicorp/go-metrics/prometheus" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" diff --git a/telemetry/metrics_config.go b/telemetry/metrics_config.go index 38126105..b2a64220 100644 --- a/telemetry/metrics_config.go +++ b/telemetry/metrics_config.go @@ -1,7 +1,7 @@ package telemetry import ( - "github.com/armon/go-metrics/prometheus" + "github.com/hashicorp/go-metrics/prometheus" ) var DefaultMetricsConfig = MetricsConfig{ diff --git a/telemetry/metrics_config_internal_test.go b/telemetry/metrics_config_internal_test.go index fbd70cd0..f5108a03 100644 --- a/telemetry/metrics_config_internal_test.go +++ b/telemetry/metrics_config_internal_test.go @@ -3,7 +3,7 @@ package telemetry import ( "testing" - "github.com/armon/go-metrics/prometheus" + "github.com/hashicorp/go-metrics/prometheus" "github.com/stretchr/testify/require" ) diff --git a/telemetry/metrics_test.go b/telemetry/metrics_test.go index 9bf4a510..f609a837 100644 --- a/telemetry/metrics_test.go +++ b/telemetry/metrics_test.go @@ -4,7 +4,7 @@ import ( "strings" "testing" - mp "github.com/armon/go-metrics/prometheus" + mp "github.com/hashicorp/go-metrics/prometheus" "github.com/prometheus/client_golang/prometheus" dto "github.com/prometheus/client_model/go" "github.com/stretchr/testify/require" diff --git a/telemetry/telemetry.go b/telemetry/telemetry.go index cbbef300..d333bf28 100644 --- a/telemetry/telemetry.go +++ b/telemetry/telemetry.go @@ -5,8 +5,8 @@ import ( "errors" "fmt" - "github.com/armon/go-metrics" "github.com/go-logr/zerologr" + "github.com/hashicorp/go-metrics" "github.com/prometheus/client_golang/prometheus" "github.com/rs/zerolog" "go.opentelemetry.io/otel" diff --git a/telemetry/tracing/tracer.go b/telemetry/tracing/tracer.go index 370efd54..371c7173 100644 --- a/telemetry/tracing/tracer.go +++ b/telemetry/tracing/tracer.go @@ -2,6 +2,7 @@ package tracing import ( "context" + "strings" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" @@ -50,6 +51,7 @@ func ExecutionAttributes(requestID string, req execute.Request) []attribute.KeyV return []attribute.KeyValue{ b7ssemconv.FunctionCID.String(req.FunctionID), b7ssemconv.FunctionMethod.String(req.Method), + b7ssemconv.FunctionArguments.String(strings.Join(req.Arguments, " ")), b7ssemconv.ExecutionNodeCount.Int(req.Config.NodeCount), b7ssemconv.ExecutionConsensus.String(req.Config.ConsensusAlgorithm), b7ssemconv.ExecutionRequestID.String(requestID), diff --git a/testing/mocks/batch_store.go b/testing/mocks/batch_store.go new file mode 100644 index 00000000..988627cb --- /dev/null +++ b/testing/mocks/batch_store.go @@ -0,0 +1,194 @@ +package mocks + +import ( + "context" + "testing" + + batchstore "github.com/blessnetwork/b7s/stores/batch-store" +) + +var _ batchstore.Store = (*BatchStore)(nil) + +type BatchStore struct { + CreateBatchFunc func(context.Context, *batchstore.ExecuteBatchRecord) error + GetBatchFunc func(context.Context, string) (*batchstore.ExecuteBatchRecord, error) + UpdateBatchFunc func(context.Context, *batchstore.ExecuteBatchRecord) error + UpdateBatchStatusFunc func(context.Context, int32, string) error + DeleteBatchFunc func(context.Context, string) error + FindBatchesFunc func(context.Context, ...int32) ([]*batchstore.ExecuteBatchRecord, error) + + CreateChunksFunc func(context.Context, ...*batchstore.ChunkRecord) error + GetChunkFunc func(context.Context, string) (*batchstore.ChunkRecord, error) + GetBatchChunksFunc func(context.Context, string) ([]*batchstore.ChunkRecord, error) + UpdateChunkFunc func(context.Context, *batchstore.ChunkRecord) error + UpdateChunkStatusFunc func(context.Context, int32, ...string) error + DeleteChunksFunc func(context.Context, ...string) error + FindChunksFunc func(context.Context, string, ...int32) ([]*batchstore.ChunkRecord, error) + + CreateWorkItemsFunc func(context.Context, ...*batchstore.WorkItemRecord) error + GetWorkItemFunc func(context.Context, string) (*batchstore.WorkItemRecord, error) + UpdateWorkItemFunc func(context.Context, *batchstore.WorkItemRecord) error + UpdateWorkItemStatusFunc func(context.Context, int32, ...string) error + UpdateWorkItemsOutputFunc func(context.Context, map[string]batchstore.WorkItemStatus) error + DeleteWorkItemsFunc func(context.Context, ...string) error + AssignWorkItemsFunc func(context.Context, string, ...string) error + FindWorkItemsFunc func(context.Context, string, string, ...int32) ([]*batchstore.WorkItemRecord, error) +} + +// TODO: Add actual types to be returned, not nils + +func BaselineMockStore(t *testing.T) *BatchStore { + t.Helper() + + return &BatchStore{ + CreateBatchFunc: func(context.Context, *batchstore.ExecuteBatchRecord) error { + return nil + }, + GetBatchFunc: func(context.Context, string) (*batchstore.ExecuteBatchRecord, error) { + return nil, nil + }, + UpdateBatchFunc: func(context.Context, *batchstore.ExecuteBatchRecord) error { + return nil + }, + UpdateBatchStatusFunc: func(context.Context, int32, string) error { + return nil + }, + DeleteBatchFunc: func(context.Context, string) error { + return nil + }, + FindBatchesFunc: func(context.Context, ...int32) ([]*batchstore.ExecuteBatchRecord, error) { + return nil, nil + }, + + CreateChunksFunc: func(context.Context, ...*batchstore.ChunkRecord) error { + return nil + }, + GetChunkFunc: func(context.Context, string) (*batchstore.ChunkRecord, error) { + return nil, nil + }, + GetBatchChunksFunc: func(context.Context, string) ([]*batchstore.ChunkRecord, error) { + return nil, nil + }, + UpdateChunkFunc: func(context.Context, *batchstore.ChunkRecord) error { + return nil + }, + UpdateChunkStatusFunc: func(context.Context, int32, ...string) error { + return nil + }, + DeleteChunksFunc: func(context.Context, ...string) error { + return nil + }, + FindChunksFunc: func(context.Context, string, ...int32) ([]*batchstore.ChunkRecord, error) { + return nil, nil + }, + + CreateWorkItemsFunc: func(context.Context, ...*batchstore.WorkItemRecord) error { + return nil + }, + GetWorkItemFunc: func(context.Context, string) (*batchstore.WorkItemRecord, error) { + return nil, nil + }, + UpdateWorkItemFunc: func(context.Context, *batchstore.WorkItemRecord) error { + return nil + }, + UpdateWorkItemStatusFunc: func(context.Context, int32, ...string) error { + return nil + }, + UpdateWorkItemsOutputFunc: func(context.Context, map[string]batchstore.WorkItemStatus) error { + return nil + }, + DeleteWorkItemsFunc: func(context.Context, ...string) error { + return nil + }, + AssignWorkItemsFunc: func(context.Context, string, ...string) error { + return nil + }, + FindWorkItemsFunc: func(context.Context, string, string, ...int32) ([]*batchstore.WorkItemRecord, error) { + return nil, nil + }, + } +} + +func (m *BatchStore) CreateBatch(ctx context.Context, rec *batchstore.ExecuteBatchRecord) error { + return m.CreateBatchFunc(ctx, rec) +} + +func (m *BatchStore) GetBatch(ctx context.Context, id string) (*batchstore.ExecuteBatchRecord, error) { + return m.GetBatchFunc(ctx, id) +} + +func (m *BatchStore) UpdateBatch(ctx context.Context, rec *batchstore.ExecuteBatchRecord) error { + return m.UpdateBatchFunc(ctx, rec) +} + +func (m *BatchStore) UpdateBatchStatus(ctx context.Context, status int32, id string) error { + return m.UpdateBatchStatusFunc(ctx, status, id) +} + +func (m *BatchStore) DeleteBatch(ctx context.Context, id string) error { + return m.DeleteBatchFunc(ctx, id) +} + +func (m *BatchStore) FindBatches(ctx context.Context, statuses ...int32) ([]*batchstore.ExecuteBatchRecord, error) { + return m.FindBatchesFunc(ctx, statuses...) +} + +func (m *BatchStore) CreateChunks(ctx context.Context, rec ...*batchstore.ChunkRecord) error { + return m.CreateChunksFunc(ctx, rec...) +} + +func (m *BatchStore) GetChunk(ctx context.Context, id string) (*batchstore.ChunkRecord, error) { + return m.GetChunkFunc(ctx, id) +} + +func (m *BatchStore) GetBatchChunks(ctx context.Context, batchID string) ([]*batchstore.ChunkRecord, error) { + return m.GetBatchChunksFunc(ctx, batchID) +} + +func (m *BatchStore) UpdateChunk(ctx context.Context, rec *batchstore.ChunkRecord) error { + return m.UpdateChunkFunc(ctx, rec) +} + +func (m *BatchStore) UpdateChunkStatus(ctx context.Context, status int32, ids ...string) error { + return m.UpdateChunkStatusFunc(ctx, status, ids...) +} + +func (m *BatchStore) DeleteChunks(ctx context.Context, ids ...string) error { + return m.DeleteChunksFunc(ctx, ids...) +} + +func (m *BatchStore) FindChunks(ctx context.Context, batchID string, statuses ...int32) ([]*batchstore.ChunkRecord, error) { + return m.FindChunksFunc(ctx, batchID, statuses...) +} + +func (m *BatchStore) CreateWorkItems(ctx context.Context, rec ...*batchstore.WorkItemRecord) error { + return m.CreateWorkItemsFunc(ctx, rec...) +} + +func (m *BatchStore) GetWorkItem(ctx context.Context, id string) (*batchstore.WorkItemRecord, error) { + return m.GetWorkItemFunc(ctx, id) +} + +func (m *BatchStore) UpdateWorkItem(ctx context.Context, rec *batchstore.WorkItemRecord) error { + return m.UpdateWorkItemFunc(ctx, rec) +} + +func (m *BatchStore) UpdateWorkItemStatus(ctx context.Context, status int32, ids ...string) error { + return m.UpdateWorkItemStatusFunc(ctx, status, ids...) +} + +func (m *BatchStore) UpdateWorkItemsOutput(ctx context.Context, statuses map[string]batchstore.WorkItemStatus) error { + return m.UpdateWorkItemsOutputFunc(ctx, statuses) +} + +func (m *BatchStore) DeleteWorkItems(ctx context.Context, ids ...string) error { + return m.DeleteWorkItemsFunc(ctx, ids...) +} + +func (m *BatchStore) AssignWorkItems(ctx context.Context, chunkID string, ids ...string) error { + return m.AssignWorkItemsFunc(ctx, chunkID, ids...) +} + +func (m *BatchStore) FindWorkItems(ctx context.Context, batchID string, chunkID string, statuses ...int32) ([]*batchstore.WorkItemRecord, error) { + return m.FindWorkItemsFunc(ctx, batchID, chunkID, statuses...) +} diff --git a/testing/mocks/core.go b/testing/mocks/core.go index 67ba9f62..d09e097c 100644 --- a/testing/mocks/core.go +++ b/testing/mocks/core.go @@ -4,7 +4,7 @@ import ( "context" "testing" - "github.com/armon/go-metrics" + "github.com/hashicorp/go-metrics" "github.com/libp2p/go-libp2p/core/peer" "github.com/prometheus/client_golang/prometheus" "github.com/rs/zerolog" diff --git a/testing/mocks/generic.go b/testing/mocks/generic.go index 7900981d..55a11df8 100644 --- a/testing/mocks/generic.go +++ b/testing/mocks/generic.go @@ -13,6 +13,7 @@ import ( "github.com/blessnetwork/b7s/models/bls" "github.com/blessnetwork/b7s/models/codes" "github.com/blessnetwork/b7s/models/execute" + "github.com/blessnetwork/b7s/models/response" ) // Global variables that can be used for testing. They are valid non-nil values for commonly needed types. @@ -108,4 +109,47 @@ var ( Archive: "/var/tmp/archive.tar.gz", Files: "/var/tmp/files", } + + GenericBatchExecutionResult = &response.ExecuteBatch{ + RequestID: GenericUUID.String(), + Code: codes.OK, + Chunks: map[string]response.NodeChunkResults{ + "generic-chunk-id-1": { + Peer: GenericPeerIDs[0], + Results: map[execute.RequestHash]*response.BatchFunctionResult{ + "dummy-request-hash-1": { + NodeResult: execute.NodeResult{ + Result: GenericExecutionResult, + }, + FunctionInvocation: execute.FunctionInvocation(GenericExecutionRequest.FunctionID, GenericExecutionRequest.Method), + Arguments: []string{"a", "b", "c"}, + }, + }, + }, + "generic-chunk-id-2": { + Peer: GenericPeerIDs[1], + Results: map[execute.RequestHash]*response.BatchFunctionResult{ + "dummy-request-hash-2": { + NodeResult: execute.NodeResult{ + Result: GenericExecutionResult, + }, + FunctionInvocation: execute.FunctionInvocation(GenericExecutionRequest.FunctionID, GenericExecutionRequest.Method), + Arguments: []string{"d", "e", "f"}, + }, + }, + }, + "generic-chunk-id-3": { + Peer: GenericPeerIDs[2], + Results: map[execute.RequestHash]*response.BatchFunctionResult{ + "dummy-request-hash-3": { + NodeResult: execute.NodeResult{ + Result: GenericExecutionResult, + }, + FunctionInvocation: execute.FunctionInvocation(GenericExecutionRequest.FunctionID, GenericExecutionRequest.Method), + Arguments: []string{"x", "y", "z"}, + }, + }, + }, + }, + } ) diff --git a/testing/mocks/node.go b/testing/mocks/node.go index 2d7399eb..ae655348 100644 --- a/testing/mocks/node.go +++ b/testing/mocks/node.go @@ -12,10 +12,11 @@ import ( // APINode implements the `Node` interface expected by the API. type APINode struct { - ExecuteFunctionFunc func(context.Context, execute.Request, string) (codes.Code, string, execute.ResultMap, execute.Cluster, error) - ExecuteFunctionBatchFunc func(context.Context, request.ExecuteBatch) (*response.ExecuteBatch, error) - ExecutionResultFunc func(id string) (execute.ResultMap, bool) - PublishFunctionInstallFunc func(ctx context.Context, uri string, cid string, subgroup string) error + ExecuteFunctionFunc func(context.Context, execute.Request, string) (codes.Code, string, execute.ResultMap, execute.Cluster, error) + StartFunctionBatchExecutionFunc func(context.Context, request.ExecuteBatch) (string, error) + GetBatchResultsFunc func(context.Context, string) (*response.ExecuteBatch, error) + ExecutionResultFunc func(id string) (execute.ResultMap, bool) + PublishFunctionInstallFunc func(ctx context.Context, uri string, cid string, subgroup string) error } func BaselineNode(t *testing.T) *APINode { @@ -24,12 +25,23 @@ func BaselineNode(t *testing.T) *APINode { node := APINode{ ExecuteFunctionFunc: func(context.Context, execute.Request, string) (codes.Code, string, execute.ResultMap, execute.Cluster, error) { - // TODO: Add a generic cluster info - return GenericExecutionResult.Code, GenericUUID.String(), GenericExecutionResultMap, execute.Cluster{}, nil + var ( + code = GenericExecutionResult.Code + uuid = GenericUUID.String() + result = GenericExecutionResultMap + cluster = execute.Cluster{ + Main: GenericPeerIDs[0], + Peers: GenericPeerIDs[:4], + } + ) + + return code, uuid, result, cluster, nil + }, + StartFunctionBatchExecutionFunc: func(context.Context, request.ExecuteBatch) (string, error) { + return "", nil }, - ExecuteFunctionBatchFunc: func(context.Context, request.ExecuteBatch) (*response.ExecuteBatch, error) { - // TODO: Return success by default. - return nil, GenericError + GetBatchResultsFunc: func(context.Context, string) (*response.ExecuteBatch, error) { + return nil, nil }, ExecutionResultFunc: func(id string) (execute.ResultMap, bool) { return GenericExecutionResultMap, true @@ -46,8 +58,12 @@ func (n *APINode) ExecuteFunction(ctx context.Context, req execute.Request, subg return n.ExecuteFunctionFunc(ctx, req, subgroup) } -func (n *APINode) ExecuteFunctionBatch(ctx context.Context, req request.ExecuteBatch) (*response.ExecuteBatch, error) { - return n.ExecuteFunctionBatchFunc(ctx, req) +func (n *APINode) StartFunctionBatchExecution(ctx context.Context, req request.ExecuteBatch) (string, error) { + return n.StartFunctionBatchExecutionFunc(ctx, req) +} + +func (n *APINode) GetBatchResults(ctx context.Context, id string) (*response.ExecuteBatch, error) { + return n.GetBatchResultsFunc(ctx, id) } func (n *APINode) ExecutionResult(id string) (execute.ResultMap, bool) {