Skip to content

Commit

Permalink
fix: tolerate required delay between service account creation and con…
Browse files Browse the repository at this point in the history
…figuration

It was discovered through testing that service accounts created on GCP
need a duration of time between creation and being referenced, otherwise
a BadRequest error occurs. A delayed retry logic is introduced to ensure
the service account is available before making additional configuration
calls.
  • Loading branch information
renan-campos committed Sep 13, 2024
1 parent a39ce2e commit 351773d
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 11 deletions.
27 changes: 21 additions & 6 deletions cmd/ocm/gcp/gcp_client_shim.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"log"
"reflect"
"strings"
"time"

"cloud.google.com/go/iam/admin/apiv1/adminpb"
"github.com/googleapis/gax-go/v2/apierror"
Expand All @@ -17,6 +18,7 @@ import (
"google.golang.org/grpc/codes"

"github.com/openshift-online/ocm-cli/pkg/gcp"
"github.com/openshift-online/ocm-cli/pkg/utils"
)

type GcpClientWifConfigShim interface {
Expand Down Expand Up @@ -142,7 +144,7 @@ func (c *shim) CreateServiceAccounts(
log *log.Logger,
) error {
for _, serviceAccount := range c.wifConfig.Gcp().ServiceAccounts() {
if err := c.createServiceAccount(ctx, log, serviceAccount, c.wifConfig); err != nil {
if err := c.createServiceAccount(ctx, log, serviceAccount); err != nil {
return err
}
if err := c.createOrUpdateRoles(ctx, log, serviceAccount.Roles()); err != nil {
Expand Down Expand Up @@ -177,11 +179,10 @@ func (c *shim) createServiceAccount(
ctx context.Context,
log *log.Logger,
serviceAccount *cmv1.WifServiceAccount,
wifConfig *cmv1.WifConfig,
) error {
serviceAccountId := serviceAccount.ServiceAccountId()
serviceAccountName := wifConfig.DisplayName() + "-" + serviceAccountId
serviceAccountDesc := poolDescription + " for WIF config " + wifConfig.DisplayName()
serviceAccountName := c.wifConfig.DisplayName() + "-" + serviceAccountId
serviceAccountDesc := poolDescription + " for WIF config " + c.wifConfig.DisplayName()

request := &adminpb.CreateServiceAccountRequest{
Name: fmt.Sprintf("projects/%s", c.wifConfig.Gcp().ProjectId()),
Expand All @@ -191,7 +192,7 @@ func (c *shim) createServiceAccount(
Description: serviceAccountDesc,
},
}
_, err := c.gcpClient.CreateServiceAccount(ctx, request)
createdServiceAccount, err := c.gcpClient.CreateServiceAccount(ctx, request)
if err != nil {
pApiError, ok := err.(*apierror.APIError)
if ok {
Expand All @@ -204,7 +205,21 @@ func (c *shim) createServiceAccount(
return errors.Wrap(err, "Failed to create IAM service account")
}
log.Printf("IAM service account %s created", serviceAccountId)
return nil

// It was found that there is a window of time between when a service
// account creation call is made that the service account is not
// available in API calls. This call waits until the service account is
// available before contiuing.
getRequest := &adminpb.GetServiceAccountRequest{
Name: fmt.Sprintf("projects/%s/serviceAccounts/%s",
c.wifConfig.Gcp().ProjectId(),
createdServiceAccount.UniqueId,
),
}
return utils.DelayedRetry(func() error {
_, err := c.gcpClient.GetServiceAccount(ctx, getRequest)
return err
}, 10, 500*time.Millisecond)
}

func (c *shim) createOrUpdateRoles(ctx context.Context, log *log.Logger, roles []*cmv1.WifRole) error {
Expand Down
13 changes: 8 additions & 5 deletions pkg/gcp/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,6 @@ import (
)

type GcpClient interface {
/*
ListServiceAccounts(ctx context.Context, project string, filter func(s string) bool) ([]string, error) //nolint:lll
DeleteRole(context.Context, *adminpb.DeleteRoleRequest) (*adminpb.Role, error)
ListRoles(context.Context, *adminpb.ListRolesRequest) (*adminpb.ListRolesResponse, error)
*/
AttachImpersonator(ctx context.Context, saId, projectId, impersonatorResourceId string) error
AttachWorkloadIdentityPool(ctx context.Context, sa *cmv1.WifServiceAccount, poolId, projectId string) error
CreateRole(context.Context, *adminpb.CreateRoleRequest) (*adminpb.Role, error)
Expand All @@ -32,6 +27,7 @@ type GcpClient interface {
DeleteWorkloadIdentityPool(ctx context.Context, resource string) (*iamv1.Operation, error) //nolint:lll
GetProjectIamPolicy(ctx context.Context, projectName string, request *cloudresourcemanager.GetIamPolicyRequest) (*cloudresourcemanager.Policy, error) //nolint:lll
GetRole(context.Context, *adminpb.GetRoleRequest) (*adminpb.Role, error)
GetServiceAccount(ctx context.Context, request *adminpb.GetServiceAccountRequest) (*adminpb.ServiceAccount, error)
GetWorkloadIdentityPool(ctx context.Context, resource string) (*iamv1.WorkloadIdentityPool, error) //nolint:lll
GetWorkloadIdentityProvider(ctx context.Context, resource string) (*iamv1.WorkloadIdentityPoolProvider, error) //nolint:lll
ProjectNumberFromId(ctx context.Context, projectId string) (int64, error)
Expand Down Expand Up @@ -195,6 +191,13 @@ func (c *gcpClient) GetRole(ctx context.Context, request *adminpb.GetRoleRequest
return c.iamClient.GetRole(ctx, request)
}

func (c *gcpClient) GetServiceAccount(
ctx context.Context,
request *adminpb.GetServiceAccountRequest,
) (*adminpb.ServiceAccount, error) {
return c.iamClient.GetServiceAccount(ctx, request)
}

//nolint:lll
func (c *gcpClient) GetWorkloadIdentityPool(ctx context.Context, resource string) (*iamv1.WorkloadIdentityPool, error) {
return c.oldIamClient.Projects.Locations.WorkloadIdentityPools.Get(resource).Context(ctx).Do()
Expand Down
13 changes: 13 additions & 0 deletions pkg/utils/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"net/url"
"os"
"regexp"
"time"
)

// the following regex defines four different patterns:
Expand Down Expand Up @@ -90,3 +91,15 @@ func HasDuplicates(valSlice []string) (string, bool) {
}
return "", false
}

func DelayedRetry(f func() error, maxRetries int, delay time.Duration) error {
var err error
for i := 0; i < maxRetries; i++ {
err = f()
if err == nil {
return nil
}
time.Sleep(delay)
}
return fmt.Errorf("Reached max retries. Last error: %s", err.Error())
}

0 comments on commit 351773d

Please sign in to comment.