Skip to content

Commit 7d50df3

Browse files
authored
Improve error messages for invalid identities (#4848)
* If we get a 401, 403, or 404 getting a MI, give the error to the user - otherwise return an internal server error * Fix typo * Appease CI * PR feedback response - handle 429s * PR feedback response - rename helper function to align with others * PR feedback response - factor out some duplicated error type checking * Remove original helper function to check for 4xx errors - it's unused now
1 parent ac7aea3 commit 7d50df3

12 files changed

Lines changed: 273 additions & 56 deletions

File tree

pkg/cluster/clustermsi.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ func EnsureClusterMsiCertificateWithParams(ctx context.Context, clusterDocID str
4949
secretName := dataplane.IdentifierForManagedIdentityCredentials(clusterDocID)
5050

5151
existingMsiCertificate, err := kvStore.GetSecret(ctx, secretName, "", nil)
52-
if err != nil && !azureerrors.IsNotFoundError(err) {
52+
if err != nil && !azureerrors.IsStatusNotFoundError(err) {
5353
return MsiCertificateRefreshResultUnchanged, err
5454
}
5555

pkg/cluster/delete.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ func (m *manager) deleteNic(ctx context.Context, nicName string) error {
4949

5050
// nic is already gone which typically happens on PLS / PE nics
5151
// as they are deleted in a different step
52-
if azureerrors.IsNotFoundError(err) {
52+
if azureerrors.IsStatusNotFoundError(err) {
5353
return nil
5454
}
5555
if err != nil {
@@ -382,7 +382,7 @@ func (m *manager) deleteClusterMsiCertificate(ctx context.Context) error {
382382

383383
secretName := dataplane.IdentifierForManagedIdentityCredentials(m.doc.ID)
384384

385-
if _, err := m.clusterMsiKeyVaultStore.DeleteSecret(ctx, secretName, nil); err != nil && !azureerrors.IsNotFoundError(err) {
385+
if _, err := m.clusterMsiKeyVaultStore.DeleteSecret(ctx, secretName, nil); err != nil && !azureerrors.IsStatusNotFoundError(err) {
386386
return err
387387
}
388388

@@ -425,7 +425,7 @@ func (m *manager) deleteFederatedCredentials(ctx context.Context) error {
425425
&armmsi.FederatedIdentityCredentialsClientListOptions{},
426426
)
427427
if err != nil {
428-
if azureerrors.IsNotFoundError(err) {
428+
if azureerrors.IsStatusNotFoundError(err) {
429429
m.log.Infof("federated identity credentials not found for %s: %v", identity.ResourceID, err.Error())
430430
} else {
431431
m.log.Errorf("failed to list federated identity credentials for %s: %v", identity.ResourceID, err.Error())
@@ -452,7 +452,7 @@ func (m *manager) deleteFederatedCredentials(ctx context.Context) error {
452452
&armmsi.FederatedIdentityCredentialsClientDeleteOptions{},
453453
)
454454
if err != nil {
455-
if azureerrors.IsNotFoundError(err) {
455+
if azureerrors.IsStatusNotFoundError(err) {
456456
m.log.Infof("federated identity credentials not found for %s: %v", identity.ResourceID, err.Error())
457457
} else {
458458
m.log.Errorf("failed to delete federated identity credentials for %s: %v", identity.ResourceID, err.Error())

pkg/cluster/platformworkloadidentities.go

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,10 @@ func (m *manager) platformWorkloadIdentityIDs(ctx context.Context) error {
4444

4545
identityDetails, err := m.userAssignedIdentities.Get(ctx, resourceId.ResourceGroupName, resourceId.Name, &armmsi.UserAssignedIdentitiesClientGetOptions{})
4646
if err != nil {
47-
if azureerrors.Is4xxError(err) {
48-
m.log.Error(err)
49-
return api.NewCloudError(http.StatusBadRequest, api.CloudErrorCodeInvalidPlatformWorkloadIdentity, fmt.Sprintf(`.properties.platformWorkloadIdentityProfile.platformWorkloadIdentities["%s"]`, operatorName), fmt.Sprintf("platform workload identity '%s' is invalid", operatorName))
47+
if azureerrors.IsStatusUnauthorizedError(err) || azureerrors.IsStatusForbiddenError(err) || azureerrors.IsStatusNotFoundError(err) || azureerrors.IsRetryableError(err) {
48+
return api.NewCloudError(http.StatusBadRequest, api.CloudErrorCodeInvalidPlatformWorkloadIdentity, fmt.Sprintf(`.properties.platformWorkloadIdentityProfile.platformWorkloadIdentities["%s"]`, operatorName), err.Error())
5049
} else {
51-
return fmt.Errorf("error occured when retrieving platform workload identity '%s' details: %w", operatorName, err)
50+
return fmt.Errorf("error occurred when retrieving platform workload identity '%s' details: %w", operatorName, err)
5251
}
5352
}
5453

pkg/cluster/platformworkloadidentities_test.go

Lines changed: 100 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,15 @@ package cluster
66
import (
77
"context"
88
"fmt"
9+
"net/http"
910
"strings"
1011
"testing"
1112

1213
"github.com/sirupsen/logrus"
1314
"github.com/stretchr/testify/assert"
1415
"go.uber.org/mock/gomock"
1516

17+
azruntime "github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime"
1618
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/msi/armmsi"
1719

1820
"github.com/Azure/ARO-RP/pkg/api"
@@ -37,6 +39,11 @@ func TestPlatformWorkloadIdentityIDs(t *testing.T) {
3739
identityBarResourceId := fmt.Sprintf("/subscriptions/%s/resourceGroups/%s/providers/Microsoft.ManagedIdentity/userAssignedIdentities/%s", subscriptionId, clusterRG, identityBarName)
3840
identityBarClientId := "0ba40ba4-0ba4-0ba4-0ba4-0ba40ba40ba4"
3941
identityBarObjectId := "1ba41ba4-1ba4-1ba4-1ba4-1ba41ba41ba4"
42+
fooTarget := `.properties.platformWorkloadIdentityProfile.platformWorkloadIdentities["foo"]`
43+
unauthorizedErr := azruntime.NewResponseError(&http.Response{StatusCode: http.StatusUnauthorized})
44+
forbiddenErr := azruntime.NewResponseError(&http.Response{StatusCode: http.StatusForbidden})
45+
notFoundErr := azruntime.NewResponseError(&http.Response{StatusCode: http.StatusNotFound})
46+
tooManyRequestsErr := azruntime.NewResponseError(&http.Response{StatusCode: http.StatusTooManyRequests})
4047

4148
validWIClusterDoc := &api.OpenShiftClusterDocument{
4249
ID: clusterId,
@@ -121,7 +128,99 @@ func TestPlatformWorkloadIdentityIDs(t *testing.T) {
121128
mock.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).AnyTimes().
122129
Return(armmsi.UserAssignedIdentitiesClientGetResponse{}, fmt.Errorf("some error occurred"))
123130
},
124-
wantErr: "error occured when retrieving platform workload identity 'foo' details: some error occurred",
131+
wantErr: "error occurred when retrieving platform workload identity 'foo' details: some error occurred",
132+
},
133+
{
134+
name: "error - unauthorized identity lookup becomes invalid platform workload identity",
135+
doc: &api.OpenShiftClusterDocument{
136+
ID: clusterId,
137+
Key: clusterId,
138+
OpenShiftCluster: &api.OpenShiftCluster{
139+
Properties: api.OpenShiftClusterProperties{
140+
PlatformWorkloadIdentityProfile: &api.PlatformWorkloadIdentityProfile{
141+
PlatformWorkloadIdentities: map[string]api.PlatformWorkloadIdentity{
142+
identityFooName: {
143+
ResourceID: identityFooResourceId,
144+
},
145+
},
146+
},
147+
},
148+
},
149+
},
150+
userAssignedIdentitiesClientMocks: func(mock *mock_armmsi.MockUserAssignedIdentitiesClient) {
151+
mock.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).AnyTimes().
152+
Return(armmsi.UserAssignedIdentitiesClientGetResponse{}, unauthorizedErr)
153+
},
154+
wantErr: api.NewCloudError(http.StatusBadRequest, api.CloudErrorCodeInvalidPlatformWorkloadIdentity, fooTarget, unauthorizedErr.Error()).Error(),
155+
},
156+
{
157+
name: "error - forbidden identity lookup becomes invalid platform workload identity",
158+
doc: &api.OpenShiftClusterDocument{
159+
ID: clusterId,
160+
Key: clusterId,
161+
OpenShiftCluster: &api.OpenShiftCluster{
162+
Properties: api.OpenShiftClusterProperties{
163+
PlatformWorkloadIdentityProfile: &api.PlatformWorkloadIdentityProfile{
164+
PlatformWorkloadIdentities: map[string]api.PlatformWorkloadIdentity{
165+
identityFooName: {
166+
ResourceID: identityFooResourceId,
167+
},
168+
},
169+
},
170+
},
171+
},
172+
},
173+
userAssignedIdentitiesClientMocks: func(mock *mock_armmsi.MockUserAssignedIdentitiesClient) {
174+
mock.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).AnyTimes().
175+
Return(armmsi.UserAssignedIdentitiesClientGetResponse{}, forbiddenErr)
176+
},
177+
wantErr: api.NewCloudError(http.StatusBadRequest, api.CloudErrorCodeInvalidPlatformWorkloadIdentity, fooTarget, forbiddenErr.Error()).Error(),
178+
},
179+
{
180+
name: "error - not found identity lookup becomes invalid platform workload identity",
181+
doc: &api.OpenShiftClusterDocument{
182+
ID: clusterId,
183+
Key: clusterId,
184+
OpenShiftCluster: &api.OpenShiftCluster{
185+
Properties: api.OpenShiftClusterProperties{
186+
PlatformWorkloadIdentityProfile: &api.PlatformWorkloadIdentityProfile{
187+
PlatformWorkloadIdentities: map[string]api.PlatformWorkloadIdentity{
188+
identityFooName: {
189+
ResourceID: identityFooResourceId,
190+
},
191+
},
192+
},
193+
},
194+
},
195+
},
196+
userAssignedIdentitiesClientMocks: func(mock *mock_armmsi.MockUserAssignedIdentitiesClient) {
197+
mock.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).AnyTimes().
198+
Return(armmsi.UserAssignedIdentitiesClientGetResponse{}, notFoundErr)
199+
},
200+
wantErr: api.NewCloudError(http.StatusBadRequest, api.CloudErrorCodeInvalidPlatformWorkloadIdentity, fooTarget, notFoundErr.Error()).Error(),
201+
},
202+
{
203+
name: "error - too many requests identity lookup becomes invalid platform workload identity",
204+
doc: &api.OpenShiftClusterDocument{
205+
ID: clusterId,
206+
Key: clusterId,
207+
OpenShiftCluster: &api.OpenShiftCluster{
208+
Properties: api.OpenShiftClusterProperties{
209+
PlatformWorkloadIdentityProfile: &api.PlatformWorkloadIdentityProfile{
210+
PlatformWorkloadIdentities: map[string]api.PlatformWorkloadIdentity{
211+
identityFooName: {
212+
ResourceID: identityFooResourceId,
213+
},
214+
},
215+
},
216+
},
217+
},
218+
},
219+
userAssignedIdentitiesClientMocks: func(mock *mock_armmsi.MockUserAssignedIdentitiesClient) {
220+
mock.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).AnyTimes().
221+
Return(armmsi.UserAssignedIdentitiesClientGetResponse{}, tooManyRequestsErr)
222+
},
223+
wantErr: api.NewCloudError(http.StatusBadRequest, api.CloudErrorCodeInvalidPlatformWorkloadIdentity, fooTarget, tooManyRequestsErr.Error()).Error(),
125224
},
126225
{
127226
name: "success - all clientIDs and objectIDs updated in clusterdoc",

pkg/operator/controllers/storageaccounts/storageaccounts.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ func (r *reconcileManager) reconcileAccounts(ctx context.Context) error {
4444

4545
armSubnet, err := r.subnets.Get(ctx, resourceGroupName, vnetName, subnetName, nil)
4646
if err != nil {
47-
if azureerrors.IsNotFoundError(err) {
47+
if azureerrors.IsStatusNotFoundError(err) {
4848
r.log.Infof("Subnet %s not found, skipping", subnet.ResourceID)
4949
continue
5050
}

pkg/operator/controllers/subnets/subnet_nsg.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ func (r *reconcileManager) ensureSubnetNSG(ctx context.Context, s subnet.Subnet)
4242

4343
subnetObject, err := r.subnets.Get(ctx, subnetID.ResourceGroupName, subnetID.Parent.Name, subnetID.Name, nil)
4444
if err != nil {
45-
if azureerrors.IsNotFoundError(err) {
45+
if azureerrors.IsStatusNotFoundError(err) {
4646
r.log.Infof("Subnet %s not found, skipping", s.ResourceID)
4747
return nil
4848
}

pkg/operator/controllers/subnets/subnet_serviceendpoint.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ func (r *reconcileManager) ensureSubnetServiceEndpoints(ctx context.Context, s s
2828

2929
subnetObject, err := r.subnets.Get(ctx, subnetID.ResourceGroupName, subnetID.Parent.Name, subnetID.Name, nil)
3030
if err != nil {
31-
if azureerrors.IsNotFoundError(err) {
31+
if azureerrors.IsStatusNotFoundError(err) {
3232
r.log.Infof("Subnet %s not found, skipping. err: %v", s.ResourceID, err)
3333
return nil
3434
}

pkg/util/acrtoken/acrtoken.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ func (m *manager) generateTokenPassword(ctx context.Context, passwordName sdkarm
209209
func (m *manager) Delete(ctx context.Context, registryProfile *api.RegistryProfile) error {
210210
err := m.tokens.DeleteAndWait(ctx, m.r.ResourceGroup, m.r.ResourceName, registryProfile.Username)
211211
// Ignore not-founds on delete
212-
if err != nil && azureerrors.IsNotFoundError(err) {
212+
if err != nil && azureerrors.IsStatusNotFoundError(err) {
213213
return nil
214214
}
215215
return err

pkg/util/azureerrors/error.go

Lines changed: 15 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -144,32 +144,34 @@ func IsDeploymentActiveError(err error) bool {
144144
return false
145145
}
146146

147-
func IsNotFoundError(err error) bool {
147+
func isStatusCodeError(err error, statusCode int) bool {
148148
var detailedErr autorest.DetailedError
149149
if errors.As(err, &detailedErr) {
150-
return detailedErr.StatusCode == http.StatusNotFound
150+
return detailedErr.StatusCode == statusCode
151151
}
152152

153153
var responseError *azcore.ResponseError
154154
if errors.As(err, &responseError) {
155-
return responseError.StatusCode == http.StatusNotFound
155+
return responseError.StatusCode == statusCode
156156
}
157157

158158
return false
159159
}
160160

161+
func IsStatusNotFoundError(err error) bool {
162+
return isStatusCodeError(err, http.StatusNotFound)
163+
}
164+
161165
func IsStatusConflictError(err error) bool {
162-
var detailedErr autorest.DetailedError
163-
if errors.As(err, &detailedErr) {
164-
return detailedErr.StatusCode == http.StatusConflict
165-
}
166+
return isStatusCodeError(err, http.StatusConflict)
167+
}
166168

167-
var responseError *azcore.ResponseError
168-
if errors.As(err, &responseError) {
169-
return responseError.StatusCode == http.StatusConflict
170-
}
169+
func IsStatusUnauthorizedError(err error) bool {
170+
return isStatusCodeError(err, http.StatusUnauthorized)
171+
}
171172

172-
return false
173+
func IsStatusForbiddenError(err error) bool {
174+
return isStatusCodeError(err, http.StatusForbidden)
173175
}
174176

175177
// IsInvalidSecretError returns if errors is InvalidCredentials error
@@ -224,15 +226,6 @@ func ResourceGroupNotFound(err error) bool {
224226
return false
225227
}
226228

227-
func Is4xxError(err error) bool {
228-
var responseError *azcore.ResponseError
229-
if errors.As(err, &responseError) {
230-
return responseError.StatusCode >= 400 && responseError.StatusCode < 500
231-
}
232-
233-
return false
234-
}
235-
236229
// IsVMSKUError checks if the error is a VM SKU availability error and returns
237230
// which profile (master/worker) is affected.
238231
// Azure Resource Manager error codes: https://learn.microsoft.com/en-us/azure/azure-resource-manager/troubleshooting/error-sku-not-available
@@ -375,20 +368,6 @@ func IsRetryableError(err error) bool {
375368
return false
376369
}
377370

378-
var responseError *azcore.ResponseError
379-
if errors.As(err, &responseError) {
380-
if responseError.StatusCode == http.StatusTooManyRequests {
381-
return true
382-
}
383-
}
384-
385-
var detailedErr autorest.DetailedError
386-
if errors.As(err, &detailedErr) {
387-
if detailedErr.StatusCode == http.StatusTooManyRequests {
388-
return true
389-
}
390-
}
391-
392371
// Check for RetryableError in error message (nested Azure errors)
393-
return strings.Contains(err.Error(), "RetryableError")
372+
return isStatusCodeError(err, http.StatusTooManyRequests) || strings.Contains(err.Error(), "RetryableError")
394373
}

0 commit comments

Comments
 (0)