Skip to content

feat: add --action-retries option to ci#461

Open
jake-kramer wants to merge 4 commits intografana:mainfrom
jake-kramer:action-retries-ci
Open

feat: add --action-retries option to ci#461
jake-kramer wants to merge 4 commits intografana:mainfrom
jake-kramer:action-retries-ci

Conversation

@jake-kramer
Copy link

Follow up to #450

@jake-kramer jake-kramer requested a review from a team as a code owner February 4, 2026 18:33
@jake-kramer jake-kramer force-pushed the action-retries-ci branch 2 times, most recently from c7fd2fb to 2d3d453 Compare February 4, 2026 18:43
@jake-kramer jake-kramer changed the title feat: Add --action-retries option to cio feat: add --action-retries option to cio Feb 4, 2026
@guicaulada guicaulada changed the title feat: add --action-retries option to cio feat: add --action-retries option to ci Feb 5, 2026
Copy link
Member

@guicaulada guicaulada left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice work! Thank you for putting this together! I thought we would eventually need this for the ci command as well. The implementation looks good and follows the same approach.

I left a couple minor suggestions to make the code a little more robust and easier to maintain... these are not blocking, just improvements to consider.

Let me know if you have any questions!

func (ci checkAllCI) Check(ctx context.Context) error {
// tryRerunFailedWorkflows attempts to rerun failed workflows if retries are available.
// Returns true if we should continue waiting (workflows were rerun or rerun failed temporarily).
func (ci *checkAllCI) tryRerunFailedWorkflows(ctx context.Context) bool {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This method is nearly identical to the one in pr.go, I think it would be better to extract the shared logic so it's easier to maintain and there are no divergences in the future.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

2c278e7 is what I came up with

Copy link
Member

@guicaulada guicaulada left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for working on my previous suggestions!

I noticed a minor type mismatch due to the cmd.Int function returning int64 and left a suggestion regarding the tests structure to remove duplication.

Let me know what you think!

ref: ref,
checks: cmd.StringSlice("check"),
excludes: cmd.StringSlice("exclude"),
actionRetries: cmd.Int("action-retries"),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a type mismatch here, cmd.Int returns int64, both on prConfig and ciConfig we have actionRetries int and on pr.go we use:

Suggested change
actionRetries: cmd.Int("action-retries"),
actionRetries: int(cmd.Int("action-retries")),

Comment on lines +352 to +493
func TestCheckAllCIActionRetries(t *testing.T) {
t.Parallel()

tests := []struct {
name string
actionRetries int
rerunCount int
rerunError error
expectedExitCode *int
}{
{
name: "CI failed with action-retries, workflows rerun",
actionRetries: 2,
rerunCount: 1,
rerunError: nil,
expectedExitCode: nil, // Should continue waiting after rerun
},
{
name: "CI failed with action-retries, no workflows to rerun",
actionRetries: 2,
rerunCount: 0,
rerunError: nil,
expectedExitCode: &one, // Should fail immediately
},
{
name: "CI failed with action-retries, rerun error continues waiting",
actionRetries: 2,
rerunCount: 0,
rerunError: cli.Exit("rerun failed", 1),
expectedExitCode: nil, // Should continue waiting and retry later
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()

fakeCIStatusChecker := &FakeCIStatusChecker{
status: github.CIStatusFailed,
RerunCount: tt.rerunCount,
RerunError: tt.rerunError,
}
cfg := &config{
recheckInterval: 1,
logger: testLogger,
}
ciConf := &ciConfig{
owner: "owner",
repo: "repo",
ref: "ref",
actionRetries: tt.actionRetries,
}

ctx, cancel := context.WithTimeout(context.Background(), 1)
cancel()

err := checkCIStatus(ctx, fakeCIStatusChecker, cfg, ciConf)

if tt.expectedExitCode != nil {
var exitErr cli.ExitCoder
require.ErrorAs(t, err, &exitErr)
require.Equal(t, *tt.expectedExitCode, exitErr.ExitCode())
} else {
// Context expired before CI could complete, which is expected
// since we cancelled the context immediately
require.Error(t, err)
}
})
}
}

func TestCheckSpecificCIActionRetries(t *testing.T) {
t.Parallel()

tests := []struct {
name string
actionRetries int
rerunCount int
rerunError error
expectedExitCode *int
}{
{
name: "CI failed with action-retries, workflows rerun",
actionRetries: 2,
rerunCount: 1,
rerunError: nil,
expectedExitCode: nil, // Should continue waiting after rerun
},
{
name: "CI failed with action-retries, no workflows to rerun",
actionRetries: 2,
rerunCount: 0,
rerunError: nil,
expectedExitCode: &one, // Should fail immediately
},
{
name: "CI failed with action-retries, rerun error continues waiting",
actionRetries: 2,
rerunCount: 0,
rerunError: cli.Exit("rerun failed", 1),
expectedExitCode: nil, // Should continue waiting and retry later
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()

fakeCIStatusChecker := &FakeCIStatusChecker{
status: github.CIStatusFailed,
RerunCount: tt.rerunCount,
RerunError: tt.rerunError,
}
cfg := &config{
recheckInterval: 1,
logger: testLogger,
}
ciConf := &ciConfig{
owner: "owner",
repo: "repo",
ref: "ref",
checks: []string{"check1", "check2"},
actionRetries: tt.actionRetries,
}

ctx, cancel := context.WithTimeout(context.Background(), 1)
cancel()

err := checkCIStatus(ctx, fakeCIStatusChecker, cfg, ciConf)

if tt.expectedExitCode != nil {
var exitErr cli.ExitCoder
require.ErrorAs(t, err, &exitErr)
require.Equal(t, *tt.expectedExitCode, exitErr.ExitCode())
} else {
// Context expired before CI could complete, which is expected
// since we cancelled the context immediately
require.Error(t, err)
}
})
}
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I noticed TestCheckAllCIActionRetries and TestCheckSpecificCIActionRetries look very similar, and the only difference between them is whether checks was populated on ciConfig. We could have a single function if we add it as a field in the test table to remove duplication.

For example:

Suggested change
func TestCheckAllCIActionRetries(t *testing.T) {
t.Parallel()
tests := []struct {
name string
actionRetries int
rerunCount int
rerunError error
expectedExitCode *int
}{
{
name: "CI failed with action-retries, workflows rerun",
actionRetries: 2,
rerunCount: 1,
rerunError: nil,
expectedExitCode: nil, // Should continue waiting after rerun
},
{
name: "CI failed with action-retries, no workflows to rerun",
actionRetries: 2,
rerunCount: 0,
rerunError: nil,
expectedExitCode: &one, // Should fail immediately
},
{
name: "CI failed with action-retries, rerun error continues waiting",
actionRetries: 2,
rerunCount: 0,
rerunError: cli.Exit("rerun failed", 1),
expectedExitCode: nil, // Should continue waiting and retry later
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
fakeCIStatusChecker := &FakeCIStatusChecker{
status: github.CIStatusFailed,
RerunCount: tt.rerunCount,
RerunError: tt.rerunError,
}
cfg := &config{
recheckInterval: 1,
logger: testLogger,
}
ciConf := &ciConfig{
owner: "owner",
repo: "repo",
ref: "ref",
actionRetries: tt.actionRetries,
}
ctx, cancel := context.WithTimeout(context.Background(), 1)
cancel()
err := checkCIStatus(ctx, fakeCIStatusChecker, cfg, ciConf)
if tt.expectedExitCode != nil {
var exitErr cli.ExitCoder
require.ErrorAs(t, err, &exitErr)
require.Equal(t, *tt.expectedExitCode, exitErr.ExitCode())
} else {
// Context expired before CI could complete, which is expected
// since we cancelled the context immediately
require.Error(t, err)
}
})
}
}
func TestCheckSpecificCIActionRetries(t *testing.T) {
t.Parallel()
tests := []struct {
name string
actionRetries int
rerunCount int
rerunError error
expectedExitCode *int
}{
{
name: "CI failed with action-retries, workflows rerun",
actionRetries: 2,
rerunCount: 1,
rerunError: nil,
expectedExitCode: nil, // Should continue waiting after rerun
},
{
name: "CI failed with action-retries, no workflows to rerun",
actionRetries: 2,
rerunCount: 0,
rerunError: nil,
expectedExitCode: &one, // Should fail immediately
},
{
name: "CI failed with action-retries, rerun error continues waiting",
actionRetries: 2,
rerunCount: 0,
rerunError: cli.Exit("rerun failed", 1),
expectedExitCode: nil, // Should continue waiting and retry later
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
fakeCIStatusChecker := &FakeCIStatusChecker{
status: github.CIStatusFailed,
RerunCount: tt.rerunCount,
RerunError: tt.rerunError,
}
cfg := &config{
recheckInterval: 1,
logger: testLogger,
}
ciConf := &ciConfig{
owner: "owner",
repo: "repo",
ref: "ref",
checks: []string{"check1", "check2"},
actionRetries: tt.actionRetries,
}
ctx, cancel := context.WithTimeout(context.Background(), 1)
cancel()
err := checkCIStatus(ctx, fakeCIStatusChecker, cfg, ciConf)
if tt.expectedExitCode != nil {
var exitErr cli.ExitCoder
require.ErrorAs(t, err, &exitErr)
require.Equal(t, *tt.expectedExitCode, exitErr.ExitCode())
} else {
// Context expired before CI could complete, which is expected
// since we cancelled the context immediately
require.Error(t, err)
}
})
}
}
func TestCheckCIActionRetries(t *testing.T) {
t.Parallel()
tests := []struct {
name string
checks []string
actionRetries int
rerunCount int
rerunError error
expectedExitCode *int
}{
{
name: "All CI failed with action-retries, workflows rerun",
actionRetries: 2,
rerunCount: 1,
expectedExitCode: nil,
},
{
name: "All CI failed with action-retries, no workflows to rerun",
actionRetries: 2,
rerunCount: 0,
expectedExitCode: &one,
},
{
name: "All CI failed with action-retries, rerun error continues waiting",
actionRetries: 2,
rerunError: cli.Exit("rerun failed", 1),
expectedExitCode: nil,
},
{
name: "Specific CI failed with action-retries, workflows rerun",
checks: []string{"check1", "check2"},
actionRetries: 2,
rerunCount: 1,
expectedExitCode: nil,
},
{
name: "Specific CI failed with action-retries, no workflows to rerun",
checks: []string{"check1", "check2"},
actionRetries: 2,
rerunCount: 0,
expectedExitCode: &one,
},
{
name: "Specific CI failed with action-retries, rerun error continues waiting",
checks: []string{"check1", "check2"},
actionRetries: 2,
rerunError: cli.Exit("rerun failed", 1),
expectedExitCode: nil,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
fakeCIStatusChecker := &FakeCIStatusChecker{
status: github.CIStatusFailed,
RerunCount: tt.rerunCount,
RerunError: tt.rerunError,
}
cfg := &config{
recheckInterval: 1,
logger: testLogger,
}
ciConf := &ciConfig{
owner: "owner",
repo: "repo",
ref: "ref",
checks: tt.checks,
actionRetries: tt.actionRetries,
}
ctx, cancel := context.WithTimeout(context.Background(), 1)
cancel()
err := checkCIStatus(ctx, fakeCIStatusChecker, cfg, ciConf)
if tt.expectedExitCode != nil {
var exitErr cli.ExitCoder
require.ErrorAs(t, err, &exitErr)
require.Equal(t, *tt.expectedExitCode, exitErr.ExitCode())
} else {
// Context expired before CI could complete, which is expected
// since we cancelled the context immediately
require.Error(t, err)
}
})
}
}

The checks field defaults to nil for the "all CI" cases, which exercises the checkAllCI path, while the cases with checks populated exercise the checkSpecificCI path.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants