diff --git a/api/v1/common_types.go b/api/v1/common_types.go index 863336e0c..53215dbde 100644 --- a/api/v1/common_types.go +++ b/api/v1/common_types.go @@ -24,9 +24,10 @@ const ( ReasonAbsent = "Absent" // Progressing reasons - ReasonRollingOut = "RollingOut" - ReasonRetrying = "Retrying" - ReasonBlocked = "Blocked" + ReasonRollingOut = "RollingOut" + ReasonRetrying = "Retrying" + ReasonBlocked = "Blocked" + ReasonInvalidConfiguration = "InvalidConfiguration" // Deprecation reasons ReasonDeprecated = "Deprecated" diff --git a/commitchecker.yaml b/commitchecker.yaml index 95201f018..c211e7865 100644 --- a/commitchecker.yaml +++ b/commitchecker.yaml @@ -1,4 +1,4 @@ -expectedMergeBase: fbe909f7ba35a9f771da6ec0431bbde2ac45d5fb +expectedMergeBase: 492c7e5f37d17dfd9097757f845c63b82fb397dc upstreamBranch: main upstreamOrg: operator-framework upstreamRepo: operator-controller diff --git a/go.mod b/go.mod index 42f35e741..9cb05349a 100644 --- a/go.mod +++ b/go.mod @@ -17,7 +17,7 @@ require ( github.com/google/go-containerregistry v0.20.7 github.com/google/renameio/v2 v2.0.2 github.com/gorilla/handlers v1.5.2 - github.com/klauspost/compress v1.18.2 + github.com/klauspost/compress v1.18.3 github.com/opencontainers/go-digest v1.0.0 github.com/opencontainers/image-spec v1.1.1 github.com/operator-framework/api v0.37.0 diff --git a/go.sum b/go.sum index 01eee4701..b67a2a0ae 100644 --- a/go.sum +++ b/go.sum @@ -323,8 +323,8 @@ github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnr github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/klauspost/compress v1.18.2 h1:iiPHWW0YrcFgpBYhsA6D1+fqHssJscY/Tm/y2Uqnapk= -github.com/klauspost/compress v1.18.2/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4= +github.com/klauspost/compress v1.18.3 h1:9PJRvfbmTabkOX8moIpXPbMMbYN60bWImDDU7L+/6zw= +github.com/klauspost/compress v1.18.3/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4= github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU= github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= diff --git a/internal/operator-controller/applier/boxcutter.go b/internal/operator-controller/applier/boxcutter.go index f71650329..f9f9cc128 100644 --- a/internal/operator-controller/applier/boxcutter.go +++ b/internal/operator-controller/applier/boxcutter.go @@ -15,6 +15,7 @@ import ( "helm.sh/helm/v3/pkg/release" "helm.sh/helm/v3/pkg/storage/driver" apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" @@ -244,8 +245,7 @@ func (m *BoxcutterStorageMigrator) Migrate(ctx context.Context, ext *ocv1.Cluste return fmt.Errorf("listing ClusterExtensionRevisions before attempting migration: %w", err) } if len(existingRevisionList.Items) != 0 { - // No migration needed. - return nil + return m.ensureMigratedRevisionStatus(ctx, existingRevisionList.Items) } ac, err := m.ActionClientGetter.ActionClientFor(ctx, ext) @@ -262,11 +262,36 @@ func (m *BoxcutterStorageMigrator) Migrate(ctx context.Context, ext *ocv1.Cluste return err } + // Only migrate from a Helm release that represents a deployed, working installation. + // If the latest revision is not deployed (e.g. FAILED), look through the history and + // select the most-recent deployed release instead. + if helmRelease == nil || helmRelease.Info == nil || helmRelease.Info.Status != release.StatusDeployed { + var err error + helmRelease, err = m.findLatestDeployedRelease(ac, ext.GetName()) + if err != nil { + return err + } + if helmRelease == nil { + // No deployed release found in history - skip migration. The ClusterExtension + // controller will handle this via normal rollout. + return nil + } + } + rev, err := m.RevisionGenerator.GenerateRevisionFromHelmRelease(ctx, helmRelease, ext, objectLabels) if err != nil { return err } + // Mark this revision as migrated from Helm so we can distinguish it from + // normal Boxcutter revisions. This label is critical for ensuring we only + // set Succeeded=True status on actually-migrated revisions, not on revision 1 + // created during normal Boxcutter operation. + if rev.Labels == nil { + rev.Labels = make(map[string]string) + } + rev.Labels[labels.MigratedFromHelmKey] = "true" + // Set ownerReference for proper garbage collection when the ClusterExtension is deleted. if err := controllerutil.SetControllerReference(ext, rev, m.Scheme); err != nil { return fmt.Errorf("set ownerref: %w", err) @@ -276,9 +301,105 @@ func (m *BoxcutterStorageMigrator) Migrate(ctx context.Context, ext *ocv1.Cluste return err } - // Re-fetch to get server-managed fields like Generation + // Set initial status on the migrated revision to mark it as succeeded. + // + // The revision must have a Succeeded=True status condition immediately after creation. + // + // A revision is only considered "Installed" (vs "RollingOut") when it has this condition. + // Without it, the system cannot determine what version is currently installed, which breaks: + // - Version resolution (can't compute upgrade paths from unknown starting point) + // - Status reporting (installed bundle appears as nil) + // - Subsequent upgrades (resolution fails without knowing current version) + // + // While the ClusterExtensionRevision controller would eventually reconcile and set this status, + // that creates a timing gap where the ClusterExtension reconciliation happens before the status + // is set, causing failures during the OLM upgrade window. + // + // Since we're creating this revision from a successfully deployed Helm release, we know it + // represents a working installation and can safely mark it as succeeded immediately. + return m.ensureRevisionStatus(ctx, rev) +} + +// ensureMigratedRevisionStatus checks if revision 1 exists and needs its status set. +// This handles the case where revision creation succeeded but status update failed. +// Returns nil if no action is needed. +func (m *BoxcutterStorageMigrator) ensureMigratedRevisionStatus(ctx context.Context, revisions []ocv1.ClusterExtensionRevision) error { + for i := range revisions { + if revisions[i].Spec.Revision != 1 { + continue + } + // Skip if already succeeded - status is already set correctly. + if meta.IsStatusConditionTrue(revisions[i].Status.Conditions, ocv1.ClusterExtensionRevisionTypeSucceeded) { + return nil + } + // Ensure revision 1 status is set correctly, including for previously migrated + // revisions that may not carry the MigratedFromHelm label. + return m.ensureRevisionStatus(ctx, &revisions[i]) + } + // No revision 1 found - migration not applicable (revisions created by normal operation). + return nil +} + +// findLatestDeployedRelease searches the Helm release history for the most recent deployed release. +// Returns nil if no deployed release is found. +func (m *BoxcutterStorageMigrator) findLatestDeployedRelease(ac helmclient.ActionInterface, name string) (*release.Release, error) { + history, err := ac.History(name) + if errors.Is(err, driver.ErrReleaseNotFound) { + // no Helm Release history -> no prior installation. + return nil, nil + } + if err != nil { + return nil, err + } + + var latestDeployed *release.Release + for _, rel := range history { + if rel == nil || rel.Info == nil { + continue + } + if rel.Info.Status != release.StatusDeployed { + continue + } + if latestDeployed == nil || rel.Version > latestDeployed.Version { + latestDeployed = rel + } + } + + return latestDeployed, nil +} + +// ensureRevisionStatus ensures the revision has the Succeeded status condition set. +// Returns nil if the status is already set or after successfully setting it. +// Only sets status on revisions that were actually migrated from Helm (marked with MigratedFromHelmKey label). +func (m *BoxcutterStorageMigrator) ensureRevisionStatus(ctx context.Context, rev *ocv1.ClusterExtensionRevision) error { + // Re-fetch to get latest version before checking status if err := m.Client.Get(ctx, client.ObjectKeyFromObject(rev), rev); err != nil { - return fmt.Errorf("getting created revision: %w", err) + return fmt.Errorf("getting existing revision for status check: %w", err) + } + + // Only set status if this revision was actually migrated from Helm. + // This prevents us from incorrectly marking normal Boxcutter revision 1 as succeeded + // when it's still in progress. + if rev.Labels[labels.MigratedFromHelmKey] != "true" { + return nil + } + + // Check if status is already set to Succeeded=True + if meta.IsStatusConditionTrue(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeSucceeded) { + return nil + } + + // Set the Succeeded status condition + meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{ + Type: ocv1.ClusterExtensionRevisionTypeSucceeded, + Status: metav1.ConditionTrue, + Reason: ocv1.ReasonSucceeded, + Message: "Revision succeeded - migrated from Helm release", + ObservedGeneration: rev.GetGeneration(), + }) + + if err := m.Client.Status().Update(ctx, rev); err != nil { + return fmt.Errorf("updating migrated revision status: %w", err) } return nil diff --git a/internal/operator-controller/applier/boxcutter_test.go b/internal/operator-controller/applier/boxcutter_test.go index 02194550f..6c27b13bd 100644 --- a/internal/operator-controller/applier/boxcutter_test.go +++ b/internal/operator-controller/applier/boxcutter_test.go @@ -19,6 +19,7 @@ import ( corev1 "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + apimeta "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" @@ -1210,7 +1211,12 @@ func TestBoxcutterStorageMigrator(t *testing.T) { require.NoError(t, ocv1.AddToScheme(testScheme)) brb := &mockBundleRevisionBuilder{} - mag := &mockActionGetter{} + mag := &mockActionGetter{ + currentRel: &release.Release{ + Name: "test123", + Info: &release.Info{Status: release.StatusDeployed}, + }, + } client := &clientMock{} sm := &applier.BoxcutterStorageMigrator{ RevisionGenerator: brb, @@ -1230,8 +1236,11 @@ func TestBoxcutterStorageMigrator(t *testing.T) { On("Create", mock.Anything, mock.AnythingOfType("*v1.ClusterExtensionRevision"), mock.Anything). Once(). Run(func(args mock.Arguments) { - // Simulate real Kubernetes behavior: Create() populates server-managed fields + // Verify the migration marker label is set before creation rev := args.Get(1).(*ocv1.ClusterExtensionRevision) + require.Equal(t, "true", rev.Labels[labels.MigratedFromHelmKey], "Migration marker label should be set") + + // Simulate real Kubernetes behavior: Create() populates server-managed fields rev.Generation = 1 rev.ResourceVersion = "1" }). @@ -1251,6 +1260,21 @@ func TestBoxcutterStorageMigrator(t *testing.T) { require.NoError(t, err) client.AssertExpectations(t) + + // Verify the migrated revision has Succeeded=True status with Succeeded reason and a migration message + statusWriter := client.Status().(*statusWriterMock) + require.True(t, statusWriter.updateCalled, "Status().Update() should be called during migration") + require.NotNil(t, statusWriter.updatedObj, "Updated object should not be nil") + + rev, ok := statusWriter.updatedObj.(*ocv1.ClusterExtensionRevision) + require.True(t, ok, "Updated object should be a ClusterExtensionRevision") + + succeededCond := apimeta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeSucceeded) + require.NotNil(t, succeededCond, "Succeeded condition should be set") + assert.Equal(t, metav1.ConditionTrue, succeededCond.Status, "Succeeded condition should be True") + assert.Equal(t, ocv1.ReasonSucceeded, succeededCond.Reason, "Reason should be Succeeded") + assert.Equal(t, "Revision succeeded - migrated from Helm release", succeededCond.Message, "Message should indicate Helm migration") + assert.Equal(t, int64(1), succeededCond.ObservedGeneration, "ObservedGeneration should match revision generation") }) t.Run("does not create revision when revisions exist", func(t *testing.T) { @@ -1271,12 +1295,313 @@ func TestBoxcutterStorageMigrator(t *testing.T) { ObjectMeta: metav1.ObjectMeta{Name: "test123"}, } + existingRev := ocv1.ClusterExtensionRevision{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-revision", + Generation: 2, + Labels: map[string]string{ + labels.MigratedFromHelmKey: "true", + }, + }, + Spec: ocv1.ClusterExtensionRevisionSpec{ + Revision: 1, // Migration creates revision 1 + }, + Status: ocv1.ClusterExtensionRevisionStatus{ + Conditions: []metav1.Condition{ + { + Type: ocv1.ClusterExtensionRevisionTypeSucceeded, + Status: metav1.ConditionTrue, + Reason: ocv1.ReasonSucceeded, + }, + }, + }, + } + + client. + On("List", mock.Anything, mock.AnythingOfType("*v1.ClusterExtensionRevisionList"), mock.Anything). + Run(func(args mock.Arguments) { + list := args.Get(1).(*ocv1.ClusterExtensionRevisionList) + list.Items = []ocv1.ClusterExtensionRevision{existingRev} + }). + Return(nil) + + err := sm.Migrate(t.Context(), ext, map[string]string{"my-label": "my-value"}) + require.NoError(t, err) + + client.AssertExpectations(t) + }) + + t.Run("sets status when revision exists but status is missing", func(t *testing.T) { + testScheme := runtime.NewScheme() + require.NoError(t, ocv1.AddToScheme(testScheme)) + + brb := &mockBundleRevisionBuilder{} + mag := &mockActionGetter{} + client := &clientMock{} + sm := &applier.BoxcutterStorageMigrator{ + RevisionGenerator: brb, + ActionClientGetter: mag, + Client: client, + Scheme: testScheme, + } + + ext := &ocv1.ClusterExtension{ + ObjectMeta: metav1.ObjectMeta{Name: "test123"}, + } + + existingRev := ocv1.ClusterExtensionRevision{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-revision", + Generation: 2, + Labels: map[string]string{ + labels.MigratedFromHelmKey: "true", + }, + }, + Spec: ocv1.ClusterExtensionRevisionSpec{ + Revision: 1, // Migration creates revision 1 + }, + // Status is empty - simulating the case where creation succeeded but status update failed + } + + client. + On("List", mock.Anything, mock.AnythingOfType("*v1.ClusterExtensionRevisionList"), mock.Anything). + Run(func(args mock.Arguments) { + list := args.Get(1).(*ocv1.ClusterExtensionRevisionList) + list.Items = []ocv1.ClusterExtensionRevision{existingRev} + }). + Return(nil) + + client. + On("Get", mock.Anything, mock.Anything, mock.AnythingOfType("*v1.ClusterExtensionRevision"), mock.Anything). + Run(func(args mock.Arguments) { + rev := args.Get(2).(*ocv1.ClusterExtensionRevision) + *rev = existingRev + }). + Return(nil) + + err := sm.Migrate(t.Context(), ext, map[string]string{"my-label": "my-value"}) + require.NoError(t, err) + + client.AssertExpectations(t) + + // Verify the status was set + statusWriter := client.Status().(*statusWriterMock) + require.True(t, statusWriter.updateCalled, "Status().Update() should be called to set missing status") + require.NotNil(t, statusWriter.updatedObj, "Updated object should not be nil") + + rev, ok := statusWriter.updatedObj.(*ocv1.ClusterExtensionRevision) + require.True(t, ok, "Updated object should be a ClusterExtensionRevision") + + succeededCond := apimeta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeSucceeded) + require.NotNil(t, succeededCond, "Succeeded condition should be set") + assert.Equal(t, metav1.ConditionTrue, succeededCond.Status, "Succeeded condition should be True") + assert.Equal(t, ocv1.ReasonSucceeded, succeededCond.Reason, "Reason should be Succeeded") + }) + + t.Run("updates status from False to True for migrated revision", func(t *testing.T) { + testScheme := runtime.NewScheme() + require.NoError(t, ocv1.AddToScheme(testScheme)) + + brb := &mockBundleRevisionBuilder{} + mag := &mockActionGetter{} + client := &clientMock{} + sm := &applier.BoxcutterStorageMigrator{ + RevisionGenerator: brb, + ActionClientGetter: mag, + Client: client, + Scheme: testScheme, + } + + ext := &ocv1.ClusterExtension{ + ObjectMeta: metav1.ObjectMeta{Name: "test123"}, + } + + // Migrated revision with Succeeded=False (e.g., from a previous failed status update attempt) + // This simulates a revision whose Succeeded condition should be corrected from False to True during migration. + existingRev := ocv1.ClusterExtensionRevision{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-revision", + Generation: 2, + Labels: map[string]string{ + labels.MigratedFromHelmKey: "true", + }, + }, + Spec: ocv1.ClusterExtensionRevisionSpec{ + Revision: 1, + }, + Status: ocv1.ClusterExtensionRevisionStatus{ + Conditions: []metav1.Condition{ + { + Type: ocv1.ClusterExtensionRevisionTypeSucceeded, + Status: metav1.ConditionFalse, // Important: False, not missing + Reason: "InProgress", + }, + }, + }, + } + + client. + On("List", mock.Anything, mock.AnythingOfType("*v1.ClusterExtensionRevisionList"), mock.Anything). + Run(func(args mock.Arguments) { + list := args.Get(1).(*ocv1.ClusterExtensionRevisionList) + list.Items = []ocv1.ClusterExtensionRevision{existingRev} + }). + Return(nil) + + client. + On("Get", mock.Anything, mock.Anything, mock.AnythingOfType("*v1.ClusterExtensionRevision"), mock.Anything). + Run(func(args mock.Arguments) { + rev := args.Get(2).(*ocv1.ClusterExtensionRevision) + *rev = existingRev + }). + Return(nil) + + err := sm.Migrate(t.Context(), ext, map[string]string{"my-label": "my-value"}) + require.NoError(t, err) + + client.AssertExpectations(t) + + // Verify the status was updated from False to True + statusWriter := client.Status().(*statusWriterMock) + require.True(t, statusWriter.updateCalled, "Status().Update() should be called to update False to True") + require.NotNil(t, statusWriter.updatedObj, "Updated object should not be nil") + + rev, ok := statusWriter.updatedObj.(*ocv1.ClusterExtensionRevision) + require.True(t, ok, "Updated object should be a ClusterExtensionRevision") + + succeededCond := apimeta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeSucceeded) + require.NotNil(t, succeededCond, "Succeeded condition should be set") + assert.Equal(t, metav1.ConditionTrue, succeededCond.Status, "Succeeded condition should be updated to True") + assert.Equal(t, ocv1.ReasonSucceeded, succeededCond.Reason, "Reason should be Succeeded") + }) + + t.Run("does not set status on non-migrated revision 1", func(t *testing.T) { + testScheme := runtime.NewScheme() + require.NoError(t, ocv1.AddToScheme(testScheme)) + + brb := &mockBundleRevisionBuilder{} + mag := &mockActionGetter{} + client := &clientMock{} + sm := &applier.BoxcutterStorageMigrator{ + RevisionGenerator: brb, + ActionClientGetter: mag, + Client: client, + Scheme: testScheme, + } + + ext := &ocv1.ClusterExtension{ + ObjectMeta: metav1.ObjectMeta{Name: "test123"}, + } + + // Revision 1 created by normal Boxcutter operation (no migration label) + // This simulates the first rollout - status should NOT be set as it may still be in progress + existingRev := ocv1.ClusterExtensionRevision{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-revision", + Generation: 2, + // No migration label - this is a normal Boxcutter revision + }, + Spec: ocv1.ClusterExtensionRevisionSpec{ + Revision: 1, + }, + } + client. On("List", mock.Anything, mock.AnythingOfType("*v1.ClusterExtensionRevisionList"), mock.Anything). Run(func(args mock.Arguments) { list := args.Get(1).(*ocv1.ClusterExtensionRevisionList) - list.Items = []ocv1.ClusterExtensionRevision{ - {}, {}, // Existing revisions. + list.Items = []ocv1.ClusterExtensionRevision{existingRev} + }). + Return(nil) + + // The migration flow calls Get() to re-fetch the revision before checking its status. + // Even for non-migrated revisions, Get() is called to determine if status needs to be set. + client. + On("Get", mock.Anything, mock.Anything, mock.AnythingOfType("*v1.ClusterExtensionRevision"), mock.Anything). + Run(func(args mock.Arguments) { + rev := args.Get(2).(*ocv1.ClusterExtensionRevision) + *rev = existingRev + }). + Return(nil) + + err := sm.Migrate(t.Context(), ext, map[string]string{"my-label": "my-value"}) + require.NoError(t, err) + + client.AssertExpectations(t) + + // Verify the status was NOT set for non-migrated revision + statusWriter := client.Status().(*statusWriterMock) + require.False(t, statusWriter.updateCalled, "Status().Update() should NOT be called for non-migrated revisions") + }) + + t.Run("migrates from most recent deployed release when latest is failed", func(t *testing.T) { + testScheme := runtime.NewScheme() + require.NoError(t, ocv1.AddToScheme(testScheme)) + + brb := &mockBundleRevisionBuilder{} + mag := &mockActionGetter{ + currentRel: &release.Release{ + Name: "test123", + Version: 3, + Info: &release.Info{Status: release.StatusFailed}, + }, + history: []*release.Release{ + { + Name: "test123", + Version: 3, + Info: &release.Info{Status: release.StatusFailed}, + }, + { + Name: "test123", + Version: 2, + Info: &release.Info{Status: release.StatusDeployed}, + }, + { + Name: "test123", + Version: 1, + Info: &release.Info{Status: release.StatusSuperseded}, + }, + }, + } + client := &clientMock{} + sm := &applier.BoxcutterStorageMigrator{ + RevisionGenerator: brb, + ActionClientGetter: mag, + Client: client, + Scheme: testScheme, + } + + ext := &ocv1.ClusterExtension{ + ObjectMeta: metav1.ObjectMeta{Name: "test123"}, + } + + client. + On("List", mock.Anything, mock.AnythingOfType("*v1.ClusterExtensionRevisionList"), mock.Anything). + Return(nil) + + client. + On("Create", mock.Anything, mock.AnythingOfType("*v1.ClusterExtensionRevision"), mock.Anything). + Once(). + Run(func(args mock.Arguments) { + // Verify the migration marker label is set before creation + rev := args.Get(1).(*ocv1.ClusterExtensionRevision) + require.Equal(t, "true", rev.Labels[labels.MigratedFromHelmKey], "Migration marker label should be set") + + // Simulate real Kubernetes behavior: Create() populates server-managed fields + rev.Generation = 1 + rev.ResourceVersion = "1" + }). + Return(nil) + + client. + On("Get", mock.Anything, mock.Anything, mock.AnythingOfType("*v1.ClusterExtensionRevision"), mock.Anything). + Run(func(args mock.Arguments) { + rev := args.Get(2).(*ocv1.ClusterExtensionRevision) + rev.ObjectMeta.Name = "test-revision" + rev.ObjectMeta.Generation = 1 + rev.ObjectMeta.ResourceVersion = "1" + rev.Labels = map[string]string{ + labels.MigratedFromHelmKey: "true", } }). Return(nil) @@ -1285,6 +1610,70 @@ func TestBoxcutterStorageMigrator(t *testing.T) { require.NoError(t, err) client.AssertExpectations(t) + + // Verify the correct release (version 2, deployed) was used instead of version 3 (failed) + require.NotNil(t, brb.helmReleaseUsed, "GenerateRevisionFromHelmRelease should have been called") + assert.Equal(t, 2, brb.helmReleaseUsed.Version, "Should use version 2 (deployed), not version 3 (failed)") + assert.Equal(t, release.StatusDeployed, brb.helmReleaseUsed.Info.Status, "Should use deployed release") + + // Verify the migrated revision has Succeeded=True status + statusWriter := client.Status().(*statusWriterMock) + require.True(t, statusWriter.updateCalled, "Status().Update() should be called during migration") + require.NotNil(t, statusWriter.updatedObj, "Updated object should not be nil") + + rev, ok := statusWriter.updatedObj.(*ocv1.ClusterExtensionRevision) + require.True(t, ok, "Updated object should be a ClusterExtensionRevision") + + succeededCond := apimeta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeSucceeded) + require.NotNil(t, succeededCond, "Succeeded condition should be set") + assert.Equal(t, metav1.ConditionTrue, succeededCond.Status, "Succeeded condition should be True") + }) + + t.Run("does not create revision when helm release is not deployed and no deployed history", func(t *testing.T) { + testScheme := runtime.NewScheme() + require.NoError(t, ocv1.AddToScheme(testScheme)) + + brb := &mockBundleRevisionBuilder{} + mag := &mockActionGetter{ + currentRel: &release.Release{ + Name: "test123", + Info: &release.Info{Status: release.StatusFailed}, + }, + history: []*release.Release{ + { + Name: "test123", + Version: 2, + Info: &release.Info{Status: release.StatusFailed}, + }, + { + Name: "test123", + Version: 1, + Info: &release.Info{Status: release.StatusFailed}, + }, + }, + } + client := &clientMock{} + sm := &applier.BoxcutterStorageMigrator{ + RevisionGenerator: brb, + ActionClientGetter: mag, + Client: client, + Scheme: testScheme, + } + + ext := &ocv1.ClusterExtension{ + ObjectMeta: metav1.ObjectMeta{Name: "test123"}, + } + + client. + On("List", mock.Anything, mock.AnythingOfType("*v1.ClusterExtensionRevisionList"), mock.Anything). + Return(nil) + + err := sm.Migrate(t.Context(), ext, map[string]string{"my-label": "my-value"}) + require.NoError(t, err) + + client.AssertExpectations(t) + // brb.GenerateRevisionFromHelmRelease should NOT have been called + require.False(t, brb.generateRevisionFromHelmReleaseCalled, "GenerateRevisionFromHelmRelease should NOT be called when no deployed release exists") }) t.Run("does not create revision when no helm release", func(t *testing.T) { @@ -1320,7 +1709,9 @@ func TestBoxcutterStorageMigrator(t *testing.T) { // mockBundleRevisionBuilder is a mock implementation of the ClusterExtensionRevisionGenerator for testing. type mockBundleRevisionBuilder struct { - makeRevisionFunc func(ctx context.Context, bundleFS fs.FS, ext *ocv1.ClusterExtension, objectLabels, revisionAnnotation map[string]string) (*ocv1.ClusterExtensionRevision, error) + makeRevisionFunc func(ctx context.Context, bundleFS fs.FS, ext *ocv1.ClusterExtension, objectLabels, revisionAnnotation map[string]string) (*ocv1.ClusterExtensionRevision, error) + generateRevisionFromHelmReleaseCalled bool + helmReleaseUsed *release.Release } func (m *mockBundleRevisionBuilder) GenerateRevision(ctx context.Context, bundleFS fs.FS, ext *ocv1.ClusterExtension, objectLabels, revisionAnnotations map[string]string) (*ocv1.ClusterExtensionRevision, error) { @@ -1332,6 +1723,8 @@ func (m *mockBundleRevisionBuilder) GenerateRevisionFromHelmRelease( helmRelease *release.Release, ext *ocv1.ClusterExtension, objectLabels map[string]string, ) (*ocv1.ClusterExtensionRevision, error) { + m.generateRevisionFromHelmReleaseCalled = true + m.helmReleaseUsed = helmRelease return &ocv1.ClusterExtensionRevision{ ObjectMeta: metav1.ObjectMeta{ Name: "test-revision", @@ -1345,6 +1738,7 @@ func (m *mockBundleRevisionBuilder) GenerateRevisionFromHelmRelease( type clientMock struct { mock.Mock + statusWriter *statusWriterMock } func (m *clientMock) List(ctx context.Context, list client.ObjectList, opts ...client.ListOption) error { @@ -1363,15 +1757,22 @@ func (m *clientMock) Create(ctx context.Context, obj client.Object, opts ...clie } func (m *clientMock) Status() client.StatusWriter { - return &statusWriterMock{mock: &m.Mock} + if m.statusWriter == nil { + m.statusWriter = &statusWriterMock{mock: &m.Mock} + } + return m.statusWriter } type statusWriterMock struct { - mock *mock.Mock + mock *mock.Mock + updatedObj client.Object + updateCalled bool } func (s *statusWriterMock) Update(ctx context.Context, obj client.Object, opts ...client.SubResourceUpdateOption) error { - // Status updates are expected during migration - return success by default + // Capture the status update for test verification + s.updatedObj = obj + s.updateCalled = true return nil } diff --git a/internal/operator-controller/applier/helm_test.go b/internal/operator-controller/applier/helm_test.go index a1332812d..25fad4d66 100644 --- a/internal/operator-controller/applier/helm_test.go +++ b/internal/operator-controller/applier/helm_test.go @@ -98,6 +98,7 @@ func (mockHelmReleaseToObjectsConverter) GetObjectsFromRelease(*release.Release) type mockActionGetter struct { actionClientForErr error getClientErr error + historyErr error installErr error dryRunInstallErr error upgradeErr error @@ -105,6 +106,7 @@ type mockActionGetter struct { reconcileErr error desiredRel *release.Release currentRel *release.Release + history []*release.Release } func (mag *mockActionGetter) ActionClientFor(ctx context.Context, obj client.Object) (helmclient.ActionInterface, error) { @@ -116,7 +118,7 @@ func (mag *mockActionGetter) Get(name string, opts ...helmclient.GetOption) (*re } func (mag *mockActionGetter) History(name string, opts ...helmclient.HistoryOption) ([]*release.Release, error) { - return nil, mag.getClientErr + return mag.history, mag.historyErr } func (mag *mockActionGetter) Install(name, namespace string, chrt *chart.Chart, vals map[string]interface{}, opts ...helmclient.InstallOption) (*release.Release, error) { diff --git a/internal/operator-controller/applier/provider.go b/internal/operator-controller/applier/provider.go index cf75b28c8..ab4e728e7 100644 --- a/internal/operator-controller/applier/provider.go +++ b/internal/operator-controller/applier/provider.go @@ -16,6 +16,7 @@ import ( "github.com/operator-framework/operator-controller/internal/operator-controller/config" "github.com/operator-framework/operator-controller/internal/operator-controller/rukpak/bundle/source" "github.com/operator-framework/operator-controller/internal/operator-controller/rukpak/render" + errorutil "github.com/operator-framework/operator-controller/internal/shared/util/error" ) // ManifestProvider returns the manifests that should be applied by OLM given a bundle and its associated ClusterExtension @@ -77,7 +78,7 @@ func (r *RegistryV1ManifestProvider) Get(bundleFS fs.FS, ext *ocv1.ClusterExtens bundleConfigBytes := extensionConfigBytes(ext) bundleConfig, err := config.UnmarshalConfig(bundleConfigBytes, schema, ext.Spec.Namespace) if err != nil { - return nil, fmt.Errorf("invalid ClusterExtension configuration: %w", err) + return nil, errorutil.NewTerminalError(ocv1.ReasonInvalidConfiguration, fmt.Errorf("invalid ClusterExtension configuration: %w", err)) } if watchNS := bundleConfig.GetWatchNamespace(); watchNS != nil { diff --git a/internal/operator-controller/applier/provider_test.go b/internal/operator-controller/applier/provider_test.go index 4138284a0..c551432e5 100644 --- a/internal/operator-controller/applier/provider_test.go +++ b/internal/operator-controller/applier/provider_test.go @@ -11,6 +11,7 @@ import ( apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/reconcile" "github.com/operator-framework/api/pkg/operators/v1alpha1" @@ -100,6 +101,37 @@ func Test_RegistryV1ManifestProvider_Integration(t *testing.T) { require.Contains(t, err.Error(), "invalid ClusterExtension configuration") }) + t.Run("returns terminal error for invalid config", func(t *testing.T) { + provider := applier.RegistryV1ManifestProvider{ + BundleRenderer: render.BundleRenderer{ + ResourceGenerators: []render.ResourceGenerator{ + func(rv1 *bundle.RegistryV1, opts render.Options) ([]client.Object, error) { + return nil, nil + }, + }, + }, + IsSingleOwnNamespaceEnabled: true, + } + + // Bundle with SingleNamespace install mode requiring watchNamespace config + bundleFS := bundlefs.Builder().WithPackageName("test"). + WithCSV(clusterserviceversion.Builder().WithInstallModeSupportFor(v1alpha1.InstallModeTypeSingleNamespace).Build()).Build() + + // ClusterExtension without required config + ext := &ocv1.ClusterExtension{ + Spec: ocv1.ClusterExtensionSpec{ + Namespace: "install-namespace", + // No config provided - should fail validation + }, + } + + _, err := provider.Get(bundleFS, ext) + require.Error(t, err) + require.Contains(t, err.Error(), "invalid ClusterExtension configuration") + // Assert that config validation errors are terminal (not retriable) + require.ErrorIs(t, err, reconcile.TerminalError(nil), "config validation errors should be terminal") + }) + t.Run("returns rendered manifests", func(t *testing.T) { provider := applier.RegistryV1ManifestProvider{ BundleRenderer: registryv1.Renderer, @@ -393,7 +425,7 @@ func Test_RegistryV1ManifestProvider_SingleOwnNamespaceSupport(t *testing.T) { }) require.Error(t, err) require.Contains(t, err.Error(), "invalid ClusterExtension configuration:") - require.Contains(t, err.Error(), "watchNamespace must be") + require.Contains(t, err.Error(), "must be") require.Contains(t, err.Error(), "install-namespace") }) diff --git a/internal/operator-controller/conditionsets/conditionsets.go b/internal/operator-controller/conditionsets/conditionsets.go index 0a741ed8d..97073a02d 100644 --- a/internal/operator-controller/conditionsets/conditionsets.go +++ b/internal/operator-controller/conditionsets/conditionsets.go @@ -40,6 +40,7 @@ var ConditionReasons = []string{ ocv1.ReasonDeprecationStatusUnknown, ocv1.ReasonFailed, ocv1.ReasonBlocked, + ocv1.ReasonInvalidConfiguration, ocv1.ReasonRetrying, ocv1.ReasonAbsent, ocv1.ReasonRollingOut, diff --git a/internal/operator-controller/config/config.go b/internal/operator-controller/config/config.go index 43f755762..30f30951c 100644 --- a/internal/operator-controller/config/config.go +++ b/internal/operator-controller/config/config.go @@ -31,6 +31,7 @@ import ( "strings" "github.com/santhosh-tekuri/jsonschema/v6" + "github.com/santhosh-tekuri/jsonschema/v6/kind" "sigs.k8s.io/yaml" ) @@ -208,7 +209,7 @@ func validateConfigWithSchema(configBytes []byte, schema map[string]any, install return fmt.Errorf("value must be a string") } if str != installNamespace { - return fmt.Errorf("invalid value %q: watchNamespace must be %q (the namespace where the operator is installed) because this operator only supports OwnNamespace install mode", str, installNamespace) + return fmt.Errorf("invalid value %q: must be %q (the namespace where the operator is installed) because this operator only supports OwnNamespace install mode", str, installNamespace) } return nil }, @@ -228,7 +229,7 @@ func validateConfigWithSchema(configBytes []byte, schema map[string]any, install return fmt.Errorf("value must be a string") } if str == installNamespace { - return fmt.Errorf("invalid value %q: watchNamespace must be different from %q (the install namespace) because this operator uses SingleNamespace install mode to watch a different namespace", str, installNamespace) + return fmt.Errorf("invalid value %q: must be different from %q (the install namespace) because this operator uses SingleNamespace install mode to watch a different namespace", str, installNamespace) } return nil }, @@ -294,9 +295,13 @@ func formatSchemaError(err error) error { // formatSingleError formats a single validation error from the schema library. func formatSingleError(errUnit jsonschema.OutputUnit) string { + if errUnit.Error == nil { + return "" + } + // Check the keyword location to identify the error type - switch { - case strings.Contains(errUnit.KeywordLocation, "/required"): + switch errKind := errUnit.Error.Kind.(type) { + case *kind.Required: // Missing required field fieldName := extractFieldNameFromMessage(errUnit.Error) if fieldName != "" { @@ -304,7 +309,7 @@ func formatSingleError(errUnit jsonschema.OutputUnit) string { } return "required field is missing" - case strings.Contains(errUnit.KeywordLocation, "/additionalProperties"): + case *kind.AdditionalProperties: // Unknown/additional field fieldName := extractFieldNameFromMessage(errUnit.Error) if fieldName != "" { @@ -312,7 +317,7 @@ func formatSingleError(errUnit jsonschema.OutputUnit) string { } return "unknown field" - case strings.Contains(errUnit.KeywordLocation, "/type"): + case *kind.Type: // Type mismatch (e.g., got null, want string) fieldPath := buildFieldPath(errUnit.InstanceLocation) if fieldPath != "" { @@ -324,16 +329,14 @@ func formatSingleError(errUnit jsonschema.OutputUnit) string { } return fmt.Sprintf("invalid type: %s", errUnit.Error.String()) - case strings.Contains(errUnit.KeywordLocation, "/format"): - // Custom format validation (e.g., OwnNamespace, SingleNamespace constraints) - // These already have good error messages from our custom validators - if errUnit.Error != nil { - return errUnit.Error.String() - } + case *kind.Format: fieldPath := buildFieldPath(errUnit.InstanceLocation) - return fmt.Sprintf("invalid format for field %q", fieldPath) + if fieldPath != "" { + return fmt.Sprintf("invalid format for field %q: %s", fieldPath, errUnit.Error.String()) + } + return fmt.Sprintf("invalid format: %s", errUnit.Error.String()) - case strings.Contains(errUnit.KeywordLocation, "/anyOf"): + case *kind.AnyOf: // anyOf validation failed - could be null or wrong type // This happens when a field accepts [null, string] but got something else fieldPath := buildFieldPath(errUnit.InstanceLocation) @@ -342,13 +345,31 @@ func formatSingleError(errUnit jsonschema.OutputUnit) string { } return "invalid value" + case *kind.MaxLength: + fieldPath := buildFieldPath(errUnit.InstanceLocation) + if fieldPath != "" { + return fmt.Sprintf("field %q must have maximum length of %d (len=%d)", fieldPath, errKind.Want, errKind.Got) + } + return errUnit.Error.String() + + case *kind.MinLength: + fieldPath := buildFieldPath(errUnit.InstanceLocation) + if fieldPath != "" { + return fmt.Sprintf("field %q must have minimum length of %d (len=%d)", fieldPath, errKind.Want, errKind.Got) + } + return errUnit.Error.String() + + case *kind.Pattern: + fieldPath := buildFieldPath(errUnit.InstanceLocation) + if fieldPath != "" { + return fmt.Sprintf("field %q must match pattern %q", fieldPath, errKind.Want) + } + return errUnit.Error.String() + default: - // Unknown error type - return the library's error message + // Unhandled error type - return the library's error message // This serves as a fallback for future schema features we haven't customized yet - if errUnit.Error != nil { - return errUnit.Error.String() - } - return "" + return errUnit.Error.String() } } diff --git a/internal/operator-controller/config/error_formatting_test.go b/internal/operator-controller/config/error_formatting_test.go index 557e21019..6ec1ebf7d 100644 --- a/internal/operator-controller/config/error_formatting_test.go +++ b/internal/operator-controller/config/error_formatting_test.go @@ -1,6 +1,7 @@ package config_test import ( + "strings" "testing" "github.com/stretchr/testify/require" @@ -72,9 +73,9 @@ func Test_ErrorFormatting_SchemaLibraryVersion(t *testing.T) { supportedInstallModes: []v1alpha1.InstallModeType{v1alpha1.InstallModeTypeOwnNamespace}, installNamespace: "correct-namespace", expectedErrSubstrings: []string{ + "invalid format for field \"watchNamespace\"", "invalid value", "wrong-namespace", - "watchNamespace must be", "correct-namespace", "the namespace where the operator is installed", "OwnNamespace install mode", @@ -86,14 +87,38 @@ func Test_ErrorFormatting_SchemaLibraryVersion(t *testing.T) { supportedInstallModes: []v1alpha1.InstallModeType{v1alpha1.InstallModeTypeSingleNamespace}, installNamespace: "install-ns", expectedErrSubstrings: []string{ + "invalid format for field \"watchNamespace\"", + "not valid singleNamespaceInstallMode", "invalid value", "install-ns", - "watchNamespace must be different from", + "must be different from", "the install namespace", "SingleNamespace install mode", "watch a different namespace", }, }, + { + name: "SingleNamespace constraint error bad namespace format", + rawConfig: []byte(`{"watchNamespace": "---AAAA-BBBB-super-long-namespace-that-that-is-waaaaaaaaayyy-longer-than-sixty-three-characters"}`), + supportedInstallModes: []v1alpha1.InstallModeType{v1alpha1.InstallModeTypeSingleNamespace}, + installNamespace: "install-ns", + expectedErrSubstrings: []string{ + "field \"watchNamespace\"", + "must match pattern \"^[a-z0-9]([-a-z0-9]*[a-z0-9])?$\"", + }, + }, + { + name: "Single- and OwnNamespace constraint error bad namespace format", + rawConfig: []byte(`{"watchNamespace": ` + strings.Repeat("A", 63) + `"}`), + supportedInstallModes: []v1alpha1.InstallModeType{v1alpha1.InstallModeTypeSingleNamespace, v1alpha1.InstallModeTypeOwnNamespace}, + installNamespace: "install-ns", + expectedErrSubstrings: []string{ + "invalid configuration", + "multiple errors found", + "must have maximum length of 63 (len=64)", + "must match pattern \"^[a-z0-9]([-a-z0-9]*[a-z0-9])?$\"", + }, + }, } { t.Run(tc.name, func(t *testing.T) { rv1 := bundle.RegistryV1{ diff --git a/internal/operator-controller/controllers/boxcutter_reconcile_steps.go b/internal/operator-controller/controllers/boxcutter_reconcile_steps.go index 5c746c7b0..dfb5dad14 100644 --- a/internal/operator-controller/controllers/boxcutter_reconcile_steps.go +++ b/internal/operator-controller/controllers/boxcutter_reconcile_steps.go @@ -19,6 +19,7 @@ package controllers import ( "cmp" "context" + "errors" "fmt" "io/fs" "slices" @@ -27,6 +28,7 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/reconcile" ocv1 "github.com/operator-framework/operator-controller/api/v1" "github.com/operator-framework/operator-controller/internal/operator-controller/labels" @@ -114,6 +116,12 @@ func ApplyBundleWithBoxcutter(apply func(ctx context.Context, contentFS fs.FS, e // If there was an error applying the resolved bundle, // report the error via the Progressing condition. setStatusProgressing(ext, wrapErrorWithResolutionInfo(state.resolvedRevisionMetadata.BundleMetadata, err)) + // Only set Installed condition for retryable errors. + // For terminal errors (Progressing: False with a terminal reason such as Blocked or InvalidConfiguration), + // the Progressing condition already provides all necessary information about the failure. + if !errors.Is(err, reconcile.TerminalError(nil)) { + setInstalledStatusFromRevisionStates(ext, state.revisionStates) + } return nil, err } diff --git a/internal/operator-controller/controllers/clusterextension_controller.go b/internal/operator-controller/controllers/clusterextension_controller.go index f089efcac..a3fcf48f3 100644 --- a/internal/operator-controller/controllers/clusterextension_controller.go +++ b/internal/operator-controller/controllers/clusterextension_controller.go @@ -50,6 +50,7 @@ import ( ocv1 "github.com/operator-framework/operator-controller/api/v1" "github.com/operator-framework/operator-controller/internal/operator-controller/conditionsets" "github.com/operator-framework/operator-controller/internal/operator-controller/labels" + errorutil "github.com/operator-framework/operator-controller/internal/shared/util/error" k8sutil "github.com/operator-framework/operator-controller/internal/shared/util/k8s" ) @@ -455,6 +456,19 @@ func (r *ClusterExtensionReconciler) SetupWithManager(mgr ctrl.Manager, opts ... } func wrapErrorWithResolutionInfo(resolved ocv1.BundleMetadata, err error) error { + // Preserve TerminalError type and reason through wrapping + if errors.Is(err, reconcile.TerminalError(nil)) { + // Extract the reason if one was provided + reason, hasReason := errorutil.ExtractTerminalReason(err) + // Unwrap to get the inner error, add context + innerErr := errorutil.UnwrapTerminal(err) + wrappedErr := fmt.Errorf("error for resolved bundle %q with version %q: %w", resolved.Name, resolved.Version, innerErr) + // Re-wrap preserving the reason if it existed + if hasReason { + return errorutil.NewTerminalError(reason, wrappedErr) + } + return reconcile.TerminalError(wrappedErr) + } return fmt.Errorf("error for resolved bundle %q with version %q: %w", resolved.Name, resolved.Version, err) } diff --git a/internal/operator-controller/controllers/common_controller.go b/internal/operator-controller/controllers/common_controller.go index 7fafc7bb7..cb6834c6b 100644 --- a/internal/operator-controller/controllers/common_controller.go +++ b/internal/operator-controller/controllers/common_controller.go @@ -25,6 +25,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" ocv1 "github.com/operator-framework/operator-controller/api/v1" + errorutil "github.com/operator-framework/operator-controller/internal/shared/util/error" ) const ( @@ -56,11 +57,8 @@ func setInstalledStatusFromRevisionStates(ext *ocv1.ClusterExtension, revisionSt // Nothing is installed if revisionStates.Installed == nil { setInstallStatus(ext, nil) - if len(revisionStates.RollingOut) == 0 { - setInstalledStatusConditionFalse(ext, ocv1.ReasonFailed, "No bundle installed") - } else { - setInstalledStatusConditionFalse(ext, ocv1.ReasonAbsent, "No bundle installed") - } + reason := determineFailureReason(revisionStates.RollingOut) + setInstalledStatusConditionFalse(ext, reason, "No bundle installed") return } // Something is installed @@ -71,6 +69,45 @@ func setInstalledStatusFromRevisionStates(ext *ocv1.ClusterExtension, revisionSt setInstalledStatusConditionSuccess(ext, fmt.Sprintf("Installed bundle %s successfully", revisionStates.Installed.Image)) } +// determineFailureReason determines the appropriate reason for the Installed condition +// when no bundle is installed (Installed: False). +// +// Returns Failed when: +// - No rolling revisions exist (nothing to install) +// - The latest rolling revision has Reason: Retrying (indicates an error occurred) +// +// Returns Absent when: +// - Rolling revisions exist with the latest having Reason: RollingOut (healthy phased rollout in progress) +// +// Rationale: +// - Failed: Semantically indicates an error prevented installation +// - Absent: Semantically indicates "not there yet" (neutral state, e.g., during healthy rollout) +// - Retrying reason indicates an error (config validation, apply failure, etc.) +// - RollingOut reason indicates healthy progress (not an error) +// - Only the LATEST revision matters - old errors superseded by newer healthy revisions should not cause Failed +// +// Note: rollingRevisions are sorted in ascending order by Spec.Revision (oldest to newest), +// so the latest revision is the LAST element in the array. +func determineFailureReason(rollingRevisions []*RevisionMetadata) string { + if len(rollingRevisions) == 0 { + return ocv1.ReasonFailed + } + + // Check if the LATEST rolling revision indicates an error (Retrying reason) + // Latest revision is the last element in the array (sorted ascending by Spec.Revision) + latestRevision := rollingRevisions[len(rollingRevisions)-1] + progressingCond := apimeta.FindStatusCondition(latestRevision.Conditions, ocv1.ClusterExtensionRevisionTypeProgressing) + if progressingCond != nil && progressingCond.Reason == string(ocv1.ClusterExtensionRevisionReasonRetrying) { + // Retrying indicates an error occurred (config, apply, validation, etc.) + // Use Failed for semantic correctness: installation failed due to error + return ocv1.ReasonFailed + } + + // No error detected in latest revision - it's progressing healthily (RollingOut) or no conditions set + // Use Absent for neutral "not installed yet" state + return ocv1.ReasonAbsent +} + // setInstalledStatusConditionSuccess sets the installed status condition to success. func setInstalledStatusConditionSuccess(ext *ocv1.ClusterExtension, message string) { SetStatusCondition(&ext.Status.Conditions, metav1.Condition{ @@ -119,12 +156,20 @@ func setStatusProgressing(ext *ocv1.ClusterExtension, err error) { if err != nil { progressingCond.Reason = ocv1.ReasonRetrying - progressingCond.Message = err.Error() + // Unwrap TerminalError to avoid "terminal error:" prefix in message + progressingCond.Message = errorutil.UnwrapTerminal(err).Error() } if errors.Is(err, reconcile.TerminalError(nil)) { progressingCond.Status = metav1.ConditionFalse - progressingCond.Reason = ocv1.ReasonBlocked + // Try to extract a specific reason from the terminal error. + // If the error was created with NewTerminalError(reason, err), use that reason. + // Otherwise, fall back to the generic "Blocked" reason. + if reason, ok := errorutil.ExtractTerminalReason(err); ok { + progressingCond.Reason = reason + } else { + progressingCond.Reason = ocv1.ReasonBlocked + } } SetStatusCondition(&ext.Status.Conditions, progressingCond) diff --git a/internal/operator-controller/controllers/common_controller_test.go b/internal/operator-controller/controllers/common_controller_test.go index 93fad962e..3cc757543 100644 --- a/internal/operator-controller/controllers/common_controller_test.go +++ b/internal/operator-controller/controllers/common_controller_test.go @@ -14,6 +14,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" ocv1 "github.com/operator-framework/operator-controller/api/v1" + errorutil "github.com/operator-framework/operator-controller/internal/shared/util/error" ) func TestSetStatusProgressing(t *testing.T) { @@ -46,14 +47,25 @@ func TestSetStatusProgressing(t *testing.T) { }, }, { - name: "non-nil ClusterExtension, terminal error, Progressing condition has status False with reason Blocked", + name: "non-nil ClusterExtension, terminal error without reason, Progressing condition has status False with reason Blocked", err: reconcile.TerminalError(errors.New("boom")), clusterExtension: &ocv1.ClusterExtension{}, expected: metav1.Condition{ Type: ocv1.TypeProgressing, Status: metav1.ConditionFalse, Reason: ocv1.ReasonBlocked, - Message: "terminal error: boom", + Message: "boom", + }, + }, + { + name: "non-nil ClusterExtension, terminal error with InvalidConfiguration reason, Progressing condition has status False with that reason", + err: errorutil.NewTerminalError(ocv1.ReasonInvalidConfiguration, errors.New("missing required field")), + clusterExtension: &ocv1.ClusterExtension{}, + expected: metav1.Condition{ + Type: ocv1.TypeProgressing, + Status: metav1.ConditionFalse, + Reason: ocv1.ReasonInvalidConfiguration, + Message: "missing required field", }, }, } { @@ -240,3 +252,160 @@ func TestSetStatusConditionWrapper(t *testing.T) { }) } } + +func TestSetInstalledStatusFromRevisionStates_ConfigValidationError(t *testing.T) { + tests := []struct { + name string + revisionStates *RevisionStates + expectedInstalledCond metav1.Condition + }{ + { + name: "no revisions at all - uses Failed", + revisionStates: &RevisionStates{ + Installed: nil, + RollingOut: nil, + }, + expectedInstalledCond: metav1.Condition{ + Type: ocv1.TypeInstalled, + Status: metav1.ConditionFalse, + Reason: ocv1.ReasonFailed, + }, + }, + { + name: "rolling revision with error (Retrying) - uses Failed", + revisionStates: &RevisionStates{ + Installed: nil, + RollingOut: []*RevisionMetadata{ + { + RevisionName: "rev-1", + Conditions: []metav1.Condition{ + { + Type: ocv1.ClusterExtensionRevisionTypeProgressing, + Status: metav1.ConditionTrue, + Reason: ocv1.ClusterExtensionRevisionReasonRetrying, + Message: "some error occurred", + }, + }, + }, + }, + }, + expectedInstalledCond: metav1.Condition{ + Type: ocv1.TypeInstalled, + Status: metav1.ConditionFalse, + Reason: ocv1.ReasonFailed, + }, + }, + { + name: "multiple rolling revisions with one Retrying - uses Failed", + revisionStates: &RevisionStates{ + Installed: nil, + RollingOut: []*RevisionMetadata{ + { + RevisionName: "rev-1", + Conditions: []metav1.Condition{ + { + Type: ocv1.ClusterExtensionRevisionTypeProgressing, + Status: metav1.ConditionTrue, + Reason: ocv1.ReasonRollingOut, + Message: "Revision is rolling out", + }, + }, + }, + { + RevisionName: "rev-2", + Conditions: []metav1.Condition{ + { + Type: ocv1.ClusterExtensionRevisionTypeProgressing, + Status: metav1.ConditionTrue, + Reason: ocv1.ClusterExtensionRevisionReasonRetrying, + Message: "validation error occurred", + }, + }, + }, + }, + }, + expectedInstalledCond: metav1.Condition{ + Type: ocv1.TypeInstalled, + Status: metav1.ConditionFalse, + Reason: ocv1.ReasonFailed, + }, + }, + { + name: "rolling revision with RollingOut reason - uses Absent", + revisionStates: &RevisionStates{ + Installed: nil, + RollingOut: []*RevisionMetadata{ + { + RevisionName: "rev-1", + Conditions: []metav1.Condition{ + { + Type: ocv1.ClusterExtensionRevisionTypeProgressing, + Status: metav1.ConditionTrue, + Reason: ocv1.ReasonRollingOut, + Message: "Revision is rolling out", + }, + }, + }, + }, + }, + expectedInstalledCond: metav1.Condition{ + Type: ocv1.TypeInstalled, + Status: metav1.ConditionFalse, + Reason: ocv1.ReasonAbsent, + }, + }, + { + name: "old revision with Retrying superseded by latest healthy - uses Absent", + revisionStates: &RevisionStates{ + Installed: nil, + RollingOut: []*RevisionMetadata{ + { + RevisionName: "rev-1", + Conditions: []metav1.Condition{ + { + Type: ocv1.ClusterExtensionRevisionTypeProgressing, + Status: metav1.ConditionTrue, + Reason: ocv1.ClusterExtensionRevisionReasonRetrying, + Message: "old error that was superseded", + }, + }, + }, + { + RevisionName: "rev-2", + Conditions: []metav1.Condition{ + { + Type: ocv1.ClusterExtensionRevisionTypeProgressing, + Status: metav1.ConditionTrue, + Reason: ocv1.ReasonRollingOut, + Message: "Latest revision is rolling out healthy", + }, + }, + }, + }, + }, + expectedInstalledCond: metav1.Condition{ + Type: ocv1.TypeInstalled, + Status: metav1.ConditionFalse, + Reason: ocv1.ReasonAbsent, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ext := &ocv1.ClusterExtension{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-ext", + Generation: 1, + }, + } + + setInstalledStatusFromRevisionStates(ext, tt.revisionStates) + + cond := meta.FindStatusCondition(ext.Status.Conditions, ocv1.TypeInstalled) + require.NotNil(t, cond) + require.Equal(t, tt.expectedInstalledCond.Status, cond.Status) + require.Equal(t, tt.expectedInstalledCond.Reason, cond.Reason) + }) + } +} diff --git a/internal/operator-controller/labels/labels.go b/internal/operator-controller/labels/labels.go index 52fa6e2b1..16f45ecbb 100644 --- a/internal/operator-controller/labels/labels.go +++ b/internal/operator-controller/labels/labels.go @@ -39,4 +39,9 @@ const ( // so that the effective ServiceAccount identity used for // ClusterExtensionRevision operations is preserved. ServiceAccountNamespaceKey = "olm.operatorframework.io/service-account-namespace" + + // MigratedFromHelmKey is the label key used to mark ClusterExtensionRevisions + // that were created during migration from Helm releases. This label is used + // to distinguish migrated revisions from those created by normal Boxcutter operation. + MigratedFromHelmKey = "olm.operatorframework.io/migrated-from-helm" ) diff --git a/internal/operator-controller/rukpak/bundle/registryv1.go b/internal/operator-controller/rukpak/bundle/registryv1.go index d8412fde6..5b0c2a8cd 100644 --- a/internal/operator-controller/rukpak/bundle/registryv1.go +++ b/internal/operator-controller/rukpak/bundle/registryv1.go @@ -15,8 +15,9 @@ import ( ) const ( - BundleConfigWatchNamespaceKey = "watchNamespace" - BundleConfigDeploymentConfigKey = "deploymentConfig" + watchNamespaceConfigKey = "watchNamespace" + namespaceNamePattern = "^[a-z0-9]([-a-z0-9]*[a-z0-9])?$" + namespaceNameMaxLength = 63 ) var ( @@ -69,19 +70,19 @@ func buildBundleConfigSchema(installModes sets.Set[v1alpha1.InstallMode]) (map[s if isWatchNamespaceConfigurable(installModes) { // Replace the generic watchNamespace with install-mode-specific version watchNSProperty, isRequired := buildWatchNamespaceProperty(installModes) - properties["watchNamespace"] = watchNSProperty + properties[watchNamespaceConfigKey] = watchNSProperty // Preserve existing required fields, only add/remove watchNamespace if isRequired { - addToRequired(baseSchema, "watchNamespace") + addToRequired(baseSchema, watchNamespaceConfigKey) } else { - removeFromRequired(baseSchema, "watchNamespace") + removeFromRequired(baseSchema, watchNamespaceConfigKey) } } else { // AllNamespaces only - remove watchNamespace property entirely // (operator always watches all namespaces, no configuration needed) - delete(properties, "watchNamespace") - removeFromRequired(baseSchema, "watchNamespace") + delete(properties, watchNamespaceConfigKey) + removeFromRequired(baseSchema, watchNamespaceConfigKey) } return baseSchema, nil @@ -138,37 +139,37 @@ func removeFromRequired(schema map[string]any, fieldName string) { // // Returns the validation rules and whether the field is required. func buildWatchNamespaceProperty(installModes sets.Set[v1alpha1.InstallMode]) (map[string]any, bool) { - watchNSProperty := map[string]any{ - "description": "The namespace that the operator should watch for custom resources", - } + const description = "The namespace that the operator should watch for custom resources" hasOwnNamespace := installModes.Has(v1alpha1.InstallMode{Type: v1alpha1.InstallModeTypeOwnNamespace, Supported: true}) hasSingleNamespace := installModes.Has(v1alpha1.InstallMode{Type: v1alpha1.InstallModeTypeSingleNamespace, Supported: true}) format := selectNamespaceFormat(hasOwnNamespace, hasSingleNamespace) - if isWatchNamespaceConfigRequired(installModes) { - watchNSProperty["type"] = "string" - if format != "" { - watchNSProperty["format"] = format - } - return watchNSProperty, true - } - - // allow null or valid namespace string - stringSchema := map[string]any{ - "type": "string", + watchNamespaceSchema := map[string]any{ + "type": "string", + "minLength": 1, + "maxLength": namespaceNameMaxLength, + // kubernetes namespace name format + "pattern": namespaceNamePattern, } if format != "" { - stringSchema["format"] = format + watchNamespaceSchema["format"] = format } - // Convert to []any for JSON schema compatibility - watchNSProperty["anyOf"] = []any{ - map[string]any{"type": "null"}, - stringSchema, + + if isWatchNamespaceConfigRequired(installModes) { + watchNamespaceSchema["description"] = description + return watchNamespaceSchema, true } - return watchNSProperty, false + // allow null or valid namespace string + return map[string]any{ + "description": description, + "anyOf": []any{ + map[string]any{"type": "null"}, + watchNamespaceSchema, + }, + }, false } // selectNamespaceFormat picks which namespace constraint to apply. diff --git a/internal/shared/util/error/terminal.go b/internal/shared/util/error/terminal.go index cd70d535f..5c9101207 100644 --- a/internal/shared/util/error/terminal.go +++ b/internal/shared/util/error/terminal.go @@ -1,6 +1,61 @@ package error -import "sigs.k8s.io/controller-runtime/pkg/reconcile" +import ( + "errors" + + "sigs.k8s.io/controller-runtime/pkg/reconcile" +) + +// terminalErrorWithReason is an internal error type that carries a Reason field +// to provide more granular categorization of terminal errors for status conditions. +type terminalErrorWithReason struct { + reason string + err error +} + +func (e *terminalErrorWithReason) Error() string { + return e.err.Error() +} + +func (e *terminalErrorWithReason) Unwrap() error { + return e.err +} + +// NewTerminalError creates a terminal error with a specific reason. +// The error is wrapped with reconcile.TerminalError so controller-runtime +// recognizes it as terminal, while preserving the reason for status reporting. +// +// Example usage: +// +// return error.NewTerminalError(ocv1.ReasonInvalidConfiguration, fmt.Errorf("missing required field")) +// +// The reason can be extracted later using ExtractTerminalReason() when setting +// status conditions to provide more specific feedback than just "Blocked". +func NewTerminalError(reason string, err error) error { + return reconcile.TerminalError(&terminalErrorWithReason{ + reason: reason, + err: err, + }) +} + +// ExtractTerminalReason extracts the reason from a terminal error created with +// NewTerminalError. Returns the reason and true if found, or empty string and +// false if the error was not created with NewTerminalError. +// +// This allows setStatusProgressing to use specific reasons like "InvalidConfiguration" +// instead of the generic "Blocked" for all terminal errors. +func ExtractTerminalReason(err error) (string, bool) { + if err == nil { + return "", false + } + // Unwrap the reconcile.TerminalError wrapper first + unwrapped := errors.Unwrap(err) + var terr *terminalErrorWithReason + if errors.As(unwrapped, &terr) { + return terr.reason, true + } + return "", false +} func WrapTerminal(err error, isTerminal bool) error { if !isTerminal || err == nil { @@ -8,3 +63,22 @@ func WrapTerminal(err error, isTerminal bool) error { } return reconcile.TerminalError(err) } + +// UnwrapTerminal unwraps a TerminalError to get the underlying error without +// the "terminal error:" prefix that reconcile.TerminalError adds to the message. +// This is useful when displaying error messages in status conditions where the +// terminal/blocked nature is already conveyed by the condition Reason field. +// +// If err is not a TerminalError, it returns err unchanged. +// If err is nil, it returns nil. +func UnwrapTerminal(err error) error { + if err == nil { + return nil + } + if errors.Is(err, reconcile.TerminalError(nil)) { + if unwrapped := errors.Unwrap(err); unwrapped != nil { + return unwrapped + } + } + return err +} diff --git a/test/e2e/features/install.feature b/test/e2e/features/install.feature index ab87b5f31..cc00b60d1 100644 --- a/test/e2e/features/install.feature +++ b/test/e2e/features/install.feature @@ -117,7 +117,7 @@ Feature: Install ClusterExtension matchLabels: "olm.operatorframework.io/metadata.name": test-catalog """ - And ClusterExtension reports Progressing as True with Reason Retrying and Message: + And ClusterExtension reports Progressing as False with Reason InvalidConfiguration and Message: """ error for resolved bundle "single-namespace-operator.1.0.0" with version "1.0.0": invalid ClusterExtension configuration: invalid configuration: required field "watchNamespace" is missing @@ -169,7 +169,7 @@ Feature: Install ClusterExtension matchLabels: "olm.operatorframework.io/metadata.name": test-catalog """ - And ClusterExtension reports Progressing as True with Reason Retrying and Message: + And ClusterExtension reports Progressing as False with Reason InvalidConfiguration and Message: """ error for resolved bundle "own-namespace-operator.1.0.0" with version "1.0.0": invalid ClusterExtension configuration: invalid configuration: required @@ -197,13 +197,13 @@ Feature: Install ClusterExtension matchLabels: "olm.operatorframework.io/metadata.name": test-catalog """ - And ClusterExtension reports Progressing as True with Reason Retrying and Message: + And ClusterExtension reports Progressing as False with Reason InvalidConfiguration and Message: """ error for resolved bundle "own-namespace-operator.1.0.0" with version - "1.0.0": invalid ClusterExtension configuration: invalid configuration: 'some-ns' - is not valid ownNamespaceInstallMode: invalid value "some-ns": watchNamespace - must be "${TEST_NAMESPACE}" (the namespace where the operator is installed) because this - operator only supports OwnNamespace install mode + "1.0.0": invalid ClusterExtension configuration: invalid configuration: invalid + format for field "watchNamespace": 'some-ns' is not valid ownNamespaceInstallMode: + invalid value "some-ns": must be "${TEST_NAMESPACE}" (the namespace where the + operator is installed) because this operator only supports OwnNamespace install mode """ When ClusterExtension is updated to set watchNamespace to own namespace value """ @@ -298,6 +298,67 @@ Feature: Install ClusterExtension mutate: true """ + @SingleOwnNamespaceInstallSupport + Scenario: Report failure when watchNamespace has invalid DNS-1123 name + Given ServiceAccount "olm-admin" in test namespace is cluster admin + When ClusterExtension is applied + """ + apiVersion: olm.operatorframework.io/v1 + kind: ClusterExtension + metadata: + name: ${NAME} + spec: + namespace: ${TEST_NAMESPACE} + serviceAccount: + name: olm-admin + config: + configType: Inline + inline: + watchNamespace: invalid-namespace- + source: + sourceType: Catalog + catalog: + packageName: single-namespace-operator + selector: + matchLabels: + "olm.operatorframework.io/metadata.name": test-catalog + """ + Then ClusterExtension reports Progressing as True with Reason Retrying and Message includes: + """ + invalid ClusterExtension configuration: invalid configuration: field "watchNamespace" must match pattern + """ + + @SingleOwnNamespaceInstallSupport + @WebhookProviderCertManager + Scenario: Reject watchNamespace for operator that does not support Single/OwnNamespace install modes + Given ServiceAccount "olm-admin" in test namespace is cluster admin + When ClusterExtension is applied + """ + apiVersion: olm.operatorframework.io/v1 + kind: ClusterExtension + metadata: + name: ${NAME} + spec: + namespace: ${TEST_NAMESPACE} + serviceAccount: + name: olm-admin + config: + configType: Inline + inline: + watchNamespace: ${TEST_NAMESPACE} + source: + sourceType: Catalog + catalog: + packageName: webhook-operator + selector: + matchLabels: + "olm.operatorframework.io/metadata.name": test-catalog + """ + Then ClusterExtension reports Progressing as True with Reason Retrying and Message includes: + """ + invalid ClusterExtension configuration: invalid configuration: unknown field "watchNamespace" + """ + @BoxcutterRuntime @ProgressDeadline Scenario: Report ClusterExtension as not progressing if the rollout does not complete within given timeout diff --git a/vendor/modules.txt b/vendor/modules.txt index 62e31409f..633ed2839 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -544,7 +544,7 @@ github.com/joelanford/ignore # github.com/json-iterator/go v1.1.12 ## explicit; go 1.12 github.com/json-iterator/go -# github.com/klauspost/compress v1.18.2 +# github.com/klauspost/compress v1.18.3 ## explicit; go 1.23 github.com/klauspost/compress github.com/klauspost/compress/flate