feat: add --action-retries option to ci#461
feat: add --action-retries option to ci#461jake-kramer wants to merge 4 commits intografana:mainfrom
--action-retries option to ci#461Conversation
c7fd2fb to
2d3d453
Compare
--action-retries option to cio--action-retries option to cio
follow up to grafana#450
2d3d453 to
ed87209
Compare
--action-retries option to cio--action-retries option to ci
guicaulada
left a comment
There was a problem hiding this comment.
Nice work! Thank you for putting this together! I thought we would eventually need this for the ci command as well. The implementation looks good and follows the same approach.
I left a couple minor suggestions to make the code a little more robust and easier to maintain... these are not blocking, just improvements to consider.
Let me know if you have any questions!
cmd/wait-for-github/ci.go
Outdated
| func (ci checkAllCI) Check(ctx context.Context) error { | ||
| // tryRerunFailedWorkflows attempts to rerun failed workflows if retries are available. | ||
| // Returns true if we should continue waiting (workflows were rerun or rerun failed temporarily). | ||
| func (ci *checkAllCI) tryRerunFailedWorkflows(ctx context.Context) bool { |
There was a problem hiding this comment.
This method is nearly identical to the one in pr.go, I think it would be better to extract the shared logic so it's easier to maintain and there are no divergences in the future.
guicaulada
left a comment
There was a problem hiding this comment.
Thank you for working on my previous suggestions!
I noticed a minor type mismatch due to the cmd.Int function returning int64 and left a suggestion regarding the tests structure to remove duplication.
Let me know what you think!
| ref: ref, | ||
| checks: cmd.StringSlice("check"), | ||
| excludes: cmd.StringSlice("exclude"), | ||
| actionRetries: cmd.Int("action-retries"), |
There was a problem hiding this comment.
There's a type mismatch here, cmd.Int returns int64, both on prConfig and ciConfig we have actionRetries int and on pr.go we use:
| actionRetries: cmd.Int("action-retries"), | |
| actionRetries: int(cmd.Int("action-retries")), |
| func TestCheckAllCIActionRetries(t *testing.T) { | ||
| t.Parallel() | ||
|
|
||
| tests := []struct { | ||
| name string | ||
| actionRetries int | ||
| rerunCount int | ||
| rerunError error | ||
| expectedExitCode *int | ||
| }{ | ||
| { | ||
| name: "CI failed with action-retries, workflows rerun", | ||
| actionRetries: 2, | ||
| rerunCount: 1, | ||
| rerunError: nil, | ||
| expectedExitCode: nil, // Should continue waiting after rerun | ||
| }, | ||
| { | ||
| name: "CI failed with action-retries, no workflows to rerun", | ||
| actionRetries: 2, | ||
| rerunCount: 0, | ||
| rerunError: nil, | ||
| expectedExitCode: &one, // Should fail immediately | ||
| }, | ||
| { | ||
| name: "CI failed with action-retries, rerun error continues waiting", | ||
| actionRetries: 2, | ||
| rerunCount: 0, | ||
| rerunError: cli.Exit("rerun failed", 1), | ||
| expectedExitCode: nil, // Should continue waiting and retry later | ||
| }, | ||
| } | ||
|
|
||
| for _, tt := range tests { | ||
| t.Run(tt.name, func(t *testing.T) { | ||
| t.Parallel() | ||
|
|
||
| fakeCIStatusChecker := &FakeCIStatusChecker{ | ||
| status: github.CIStatusFailed, | ||
| RerunCount: tt.rerunCount, | ||
| RerunError: tt.rerunError, | ||
| } | ||
| cfg := &config{ | ||
| recheckInterval: 1, | ||
| logger: testLogger, | ||
| } | ||
| ciConf := &ciConfig{ | ||
| owner: "owner", | ||
| repo: "repo", | ||
| ref: "ref", | ||
| actionRetries: tt.actionRetries, | ||
| } | ||
|
|
||
| ctx, cancel := context.WithTimeout(context.Background(), 1) | ||
| cancel() | ||
|
|
||
| err := checkCIStatus(ctx, fakeCIStatusChecker, cfg, ciConf) | ||
|
|
||
| if tt.expectedExitCode != nil { | ||
| var exitErr cli.ExitCoder | ||
| require.ErrorAs(t, err, &exitErr) | ||
| require.Equal(t, *tt.expectedExitCode, exitErr.ExitCode()) | ||
| } else { | ||
| // Context expired before CI could complete, which is expected | ||
| // since we cancelled the context immediately | ||
| require.Error(t, err) | ||
| } | ||
| }) | ||
| } | ||
| } | ||
|
|
||
| func TestCheckSpecificCIActionRetries(t *testing.T) { | ||
| t.Parallel() | ||
|
|
||
| tests := []struct { | ||
| name string | ||
| actionRetries int | ||
| rerunCount int | ||
| rerunError error | ||
| expectedExitCode *int | ||
| }{ | ||
| { | ||
| name: "CI failed with action-retries, workflows rerun", | ||
| actionRetries: 2, | ||
| rerunCount: 1, | ||
| rerunError: nil, | ||
| expectedExitCode: nil, // Should continue waiting after rerun | ||
| }, | ||
| { | ||
| name: "CI failed with action-retries, no workflows to rerun", | ||
| actionRetries: 2, | ||
| rerunCount: 0, | ||
| rerunError: nil, | ||
| expectedExitCode: &one, // Should fail immediately | ||
| }, | ||
| { | ||
| name: "CI failed with action-retries, rerun error continues waiting", | ||
| actionRetries: 2, | ||
| rerunCount: 0, | ||
| rerunError: cli.Exit("rerun failed", 1), | ||
| expectedExitCode: nil, // Should continue waiting and retry later | ||
| }, | ||
| } | ||
|
|
||
| for _, tt := range tests { | ||
| t.Run(tt.name, func(t *testing.T) { | ||
| t.Parallel() | ||
|
|
||
| fakeCIStatusChecker := &FakeCIStatusChecker{ | ||
| status: github.CIStatusFailed, | ||
| RerunCount: tt.rerunCount, | ||
| RerunError: tt.rerunError, | ||
| } | ||
| cfg := &config{ | ||
| recheckInterval: 1, | ||
| logger: testLogger, | ||
| } | ||
| ciConf := &ciConfig{ | ||
| owner: "owner", | ||
| repo: "repo", | ||
| ref: "ref", | ||
| checks: []string{"check1", "check2"}, | ||
| actionRetries: tt.actionRetries, | ||
| } | ||
|
|
||
| ctx, cancel := context.WithTimeout(context.Background(), 1) | ||
| cancel() | ||
|
|
||
| err := checkCIStatus(ctx, fakeCIStatusChecker, cfg, ciConf) | ||
|
|
||
| if tt.expectedExitCode != nil { | ||
| var exitErr cli.ExitCoder | ||
| require.ErrorAs(t, err, &exitErr) | ||
| require.Equal(t, *tt.expectedExitCode, exitErr.ExitCode()) | ||
| } else { | ||
| // Context expired before CI could complete, which is expected | ||
| // since we cancelled the context immediately | ||
| require.Error(t, err) | ||
| } | ||
| }) | ||
| } | ||
| } |
There was a problem hiding this comment.
I noticed TestCheckAllCIActionRetries and TestCheckSpecificCIActionRetries look very similar, and the only difference between them is whether checks was populated on ciConfig. We could have a single function if we add it as a field in the test table to remove duplication.
For example:
| func TestCheckAllCIActionRetries(t *testing.T) { | |
| t.Parallel() | |
| tests := []struct { | |
| name string | |
| actionRetries int | |
| rerunCount int | |
| rerunError error | |
| expectedExitCode *int | |
| }{ | |
| { | |
| name: "CI failed with action-retries, workflows rerun", | |
| actionRetries: 2, | |
| rerunCount: 1, | |
| rerunError: nil, | |
| expectedExitCode: nil, // Should continue waiting after rerun | |
| }, | |
| { | |
| name: "CI failed with action-retries, no workflows to rerun", | |
| actionRetries: 2, | |
| rerunCount: 0, | |
| rerunError: nil, | |
| expectedExitCode: &one, // Should fail immediately | |
| }, | |
| { | |
| name: "CI failed with action-retries, rerun error continues waiting", | |
| actionRetries: 2, | |
| rerunCount: 0, | |
| rerunError: cli.Exit("rerun failed", 1), | |
| expectedExitCode: nil, // Should continue waiting and retry later | |
| }, | |
| } | |
| for _, tt := range tests { | |
| t.Run(tt.name, func(t *testing.T) { | |
| t.Parallel() | |
| fakeCIStatusChecker := &FakeCIStatusChecker{ | |
| status: github.CIStatusFailed, | |
| RerunCount: tt.rerunCount, | |
| RerunError: tt.rerunError, | |
| } | |
| cfg := &config{ | |
| recheckInterval: 1, | |
| logger: testLogger, | |
| } | |
| ciConf := &ciConfig{ | |
| owner: "owner", | |
| repo: "repo", | |
| ref: "ref", | |
| actionRetries: tt.actionRetries, | |
| } | |
| ctx, cancel := context.WithTimeout(context.Background(), 1) | |
| cancel() | |
| err := checkCIStatus(ctx, fakeCIStatusChecker, cfg, ciConf) | |
| if tt.expectedExitCode != nil { | |
| var exitErr cli.ExitCoder | |
| require.ErrorAs(t, err, &exitErr) | |
| require.Equal(t, *tt.expectedExitCode, exitErr.ExitCode()) | |
| } else { | |
| // Context expired before CI could complete, which is expected | |
| // since we cancelled the context immediately | |
| require.Error(t, err) | |
| } | |
| }) | |
| } | |
| } | |
| func TestCheckSpecificCIActionRetries(t *testing.T) { | |
| t.Parallel() | |
| tests := []struct { | |
| name string | |
| actionRetries int | |
| rerunCount int | |
| rerunError error | |
| expectedExitCode *int | |
| }{ | |
| { | |
| name: "CI failed with action-retries, workflows rerun", | |
| actionRetries: 2, | |
| rerunCount: 1, | |
| rerunError: nil, | |
| expectedExitCode: nil, // Should continue waiting after rerun | |
| }, | |
| { | |
| name: "CI failed with action-retries, no workflows to rerun", | |
| actionRetries: 2, | |
| rerunCount: 0, | |
| rerunError: nil, | |
| expectedExitCode: &one, // Should fail immediately | |
| }, | |
| { | |
| name: "CI failed with action-retries, rerun error continues waiting", | |
| actionRetries: 2, | |
| rerunCount: 0, | |
| rerunError: cli.Exit("rerun failed", 1), | |
| expectedExitCode: nil, // Should continue waiting and retry later | |
| }, | |
| } | |
| for _, tt := range tests { | |
| t.Run(tt.name, func(t *testing.T) { | |
| t.Parallel() | |
| fakeCIStatusChecker := &FakeCIStatusChecker{ | |
| status: github.CIStatusFailed, | |
| RerunCount: tt.rerunCount, | |
| RerunError: tt.rerunError, | |
| } | |
| cfg := &config{ | |
| recheckInterval: 1, | |
| logger: testLogger, | |
| } | |
| ciConf := &ciConfig{ | |
| owner: "owner", | |
| repo: "repo", | |
| ref: "ref", | |
| checks: []string{"check1", "check2"}, | |
| actionRetries: tt.actionRetries, | |
| } | |
| ctx, cancel := context.WithTimeout(context.Background(), 1) | |
| cancel() | |
| err := checkCIStatus(ctx, fakeCIStatusChecker, cfg, ciConf) | |
| if tt.expectedExitCode != nil { | |
| var exitErr cli.ExitCoder | |
| require.ErrorAs(t, err, &exitErr) | |
| require.Equal(t, *tt.expectedExitCode, exitErr.ExitCode()) | |
| } else { | |
| // Context expired before CI could complete, which is expected | |
| // since we cancelled the context immediately | |
| require.Error(t, err) | |
| } | |
| }) | |
| } | |
| } | |
| func TestCheckCIActionRetries(t *testing.T) { | |
| t.Parallel() | |
| tests := []struct { | |
| name string | |
| checks []string | |
| actionRetries int | |
| rerunCount int | |
| rerunError error | |
| expectedExitCode *int | |
| }{ | |
| { | |
| name: "All CI failed with action-retries, workflows rerun", | |
| actionRetries: 2, | |
| rerunCount: 1, | |
| expectedExitCode: nil, | |
| }, | |
| { | |
| name: "All CI failed with action-retries, no workflows to rerun", | |
| actionRetries: 2, | |
| rerunCount: 0, | |
| expectedExitCode: &one, | |
| }, | |
| { | |
| name: "All CI failed with action-retries, rerun error continues waiting", | |
| actionRetries: 2, | |
| rerunError: cli.Exit("rerun failed", 1), | |
| expectedExitCode: nil, | |
| }, | |
| { | |
| name: "Specific CI failed with action-retries, workflows rerun", | |
| checks: []string{"check1", "check2"}, | |
| actionRetries: 2, | |
| rerunCount: 1, | |
| expectedExitCode: nil, | |
| }, | |
| { | |
| name: "Specific CI failed with action-retries, no workflows to rerun", | |
| checks: []string{"check1", "check2"}, | |
| actionRetries: 2, | |
| rerunCount: 0, | |
| expectedExitCode: &one, | |
| }, | |
| { | |
| name: "Specific CI failed with action-retries, rerun error continues waiting", | |
| checks: []string{"check1", "check2"}, | |
| actionRetries: 2, | |
| rerunError: cli.Exit("rerun failed", 1), | |
| expectedExitCode: nil, | |
| }, | |
| } | |
| for _, tt := range tests { | |
| t.Run(tt.name, func(t *testing.T) { | |
| t.Parallel() | |
| fakeCIStatusChecker := &FakeCIStatusChecker{ | |
| status: github.CIStatusFailed, | |
| RerunCount: tt.rerunCount, | |
| RerunError: tt.rerunError, | |
| } | |
| cfg := &config{ | |
| recheckInterval: 1, | |
| logger: testLogger, | |
| } | |
| ciConf := &ciConfig{ | |
| owner: "owner", | |
| repo: "repo", | |
| ref: "ref", | |
| checks: tt.checks, | |
| actionRetries: tt.actionRetries, | |
| } | |
| ctx, cancel := context.WithTimeout(context.Background(), 1) | |
| cancel() | |
| err := checkCIStatus(ctx, fakeCIStatusChecker, cfg, ciConf) | |
| if tt.expectedExitCode != nil { | |
| var exitErr cli.ExitCoder | |
| require.ErrorAs(t, err, &exitErr) | |
| require.Equal(t, *tt.expectedExitCode, exitErr.ExitCode()) | |
| } else { | |
| // Context expired before CI could complete, which is expected | |
| // since we cancelled the context immediately | |
| require.Error(t, err) | |
| } | |
| }) | |
| } | |
| } |
The checks field defaults to nil for the "all CI" cases, which exercises the checkAllCI path, while the cases with checks populated exercise the checkSpecificCI path.
Follow up to #450