From e72607c1057b13542faeee5e06e5bf52630dbb70 Mon Sep 17 00:00:00 2001 From: Adrian Riobo Date: Fri, 13 Dec 2024 16:10:12 +0100 Subject: [PATCH] feat: refactor mac request / release flow This commit move the time consuming effort for having fresh machines on the mac pool from the request action to the release. Now release will manage the root volume replace task and request will create new user credentials and set machine as locked to avoid new workloads to make use of it Signed-off-by: Adrian Riobo --- cmd/mapt/cmd/aws/hosts/mac.go | 12 +-- docs/aws/mac.drawio | 87 +++---------------- docs/aws/mac.svg | 2 +- pkg/provider/aws/action/mac/constants.go | 5 +- pkg/provider/aws/action/mac/mac-machine.go | 47 ++++------ pkg/provider/aws/action/mac/mac.go | 34 ++++---- .../aws/services/ec2/compute/compute.go | 34 +++++++- 7 files changed, 81 insertions(+), 140 deletions(-) diff --git a/cmd/mapt/cmd/aws/hosts/mac.go b/cmd/mapt/cmd/aws/hosts/mac.go index 2dd0ca14b..abe07b6a0 100644 --- a/cmd/mapt/cmd/aws/hosts/mac.go +++ b/cmd/mapt/cmd/aws/hosts/mac.go @@ -23,10 +23,10 @@ const ( dhIDDesc string = "id for the dedicated host" arch string = "arch" archDesc string = "mac architecture allowed values x86, m1, m2" - archDefault string = "m2" + archDefault string = mac.DefaultArch osVersion string = "version" osVersionDesc string = "macos operating system vestion 11, 12 on x86 and m1/m2; 13, 14 on all archs" - osDefault string = "14" + osDefault string = mac.DefaultOSVersion fixedLocation string = "fixed-location" fixedLocationDesc string = "if this flag is set the host will be created only on the region set by the AWS Env (AWS_DEFAULT_REGION)" ) @@ -96,14 +96,6 @@ func getMacRequest() *cobra.Command { flagSet.Bool(airgap, false, airgapDesc) flagSet.AddFlagSet(params.GetGHActionsFlagset()) c.PersistentFlags().AddFlagSet(flagSet) - err := c.MarkPersistentFlagRequired(arch) - if err != nil { - logging.Error(err) - } - err = c.MarkPersistentFlagRequired(osVersion) - if err != nil { - logging.Error(err) - } return c } diff --git a/docs/aws/mac.drawio b/docs/aws/mac.drawio index b57161131..b073a2662 100644 --- a/docs/aws/mac.drawio +++ b/docs/aws/mac.drawio @@ -1,6 +1,6 @@ - + - + @@ -14,7 +14,7 @@ - + @@ -22,7 +22,7 @@ - + @@ -32,7 +32,7 @@ - + @@ -60,7 +60,7 @@ - + @@ -72,58 +72,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -136,13 +84,13 @@ - + - + - + @@ -151,7 +99,7 @@ - + @@ -160,19 +108,10 @@ - - - - - - - - - - + - + @@ -181,7 +120,7 @@ - + diff --git a/docs/aws/mac.svg b/docs/aws/mac.svg index a1a65ec26..5dd116b68 100644 --- a/docs/aws/mac.svg +++ b/docs/aws/mac.svg @@ -1,4 +1,4 @@ -
mapt
mapt
aws mac request 
aws mac request 
Region
Region
Availability Zone
Availability Zone
Create will check if dedicated host exists on the pool and if it not locked. If no locked will create a machine and locked it. Otherwise it will return error
Create will check if dedicated host exis...
Region
Region
Availability Zone
Availability Zone
VPC
VPC
Public subnet
Public subnet
Create a mac machine with an OS version (default 14) it has internet connectivity. It creates all infra around it (networking, security groups, keys...)
Create a mac machine with an OS version...
Scenario 1
Scenario 1
Scenario 2 (airgap)
Scenario 2 (airgap)
Region
Region
Availability Zone
Availability Zone
VPC
VPC
Airgap subnet
Airgap subnet
Public subnet
Public subnet
bastion
bastion
Create a mac machine with an OS version (default 14). Mac machine is sitting on an airgap network, although it is able to run an init phase (bootstrap) with connectivity. Once the creation is finished it can only be accessed through the bastion and it will not have external connectivity .It creates all infra around it (networking, security groups, keys, bastion...)
Create a mac machine with an OS version (default 14). Mac machine is sitting on...
mapt
mapt
aws mac destroy 
aws mac destroy 
After 24 hours the dedicated host can be destroyed. If mac machine exists and it is not locked it will remove all assets related to it. If no mac machine exists it will remove the dedicated host
After 24 hours the dedicated host can...
mapt
mapt
aws mac release
aws mac release
Release operation will remove the lock the machine identified , as new requests can make use of it
Release operation will remove the lock t...
Text is not SVG - cannot display
\ No newline at end of file +
mapt
aws mac request 
Region
Availability Zone
Create will check if dedicated host exists on the pool and if it not locked. If no locked will create a machine and locked it. Otherwise it will return error
Region
Availability Zone
VPC
Public subnet
There are non locked dedicated mac hosts on the pool. The request will create a new user access and set the dedicated mac host as locked
mapt
aws mac destroy 
After 24 hours the dedicated host can be destroyed. If mac machine exists and it is not locked it will remove all assets related to it. If no mac machine exists it will remove the dedicated host
mapt
aws mac release
Release operation will remove the lock the machine identified , Also will run the replace root volume task to add a new fresh machine to the pool to be consumed by requests
\ No newline at end of file diff --git a/pkg/provider/aws/action/mac/constants.go b/pkg/provider/aws/action/mac/constants.go index 0fecf6358..bd525c59c 100644 --- a/pkg/provider/aws/action/mac/constants.go +++ b/pkg/provider/aws/action/mac/constants.go @@ -7,6 +7,7 @@ const ( awsMacMachineID = "amm" customResourceTypeLock = "rh:qe:aws:mac:lock" + customResourceTypeKey = "rh:mapt:aws:mac:key" outputLock = "ammLock" outputHost = "ammHost" @@ -19,8 +20,8 @@ const ( outputRegion = "ammRegion" amiRegex = "amzn-ec2-macos-%s*" - archDefault = "m2" - osVersionDefault = "14" + DefaultArch = "m2" + DefaultOSVersion = "15" vncDefaultPort int = 5900 diskSize int = 100 diff --git a/pkg/provider/aws/action/mac/mac-machine.go b/pkg/provider/aws/action/mac/mac-machine.go index cf1443f7e..c93570686 100644 --- a/pkg/provider/aws/action/mac/mac-machine.go +++ b/pkg/provider/aws/action/mac/mac-machine.go @@ -83,38 +83,25 @@ func (r *MacRequest) replaceMachine(h *HostInformation) error { return err } logging.Debugf("Replacing root volume for AMI %s", *ami.Image.ImageId) - _, err = qEC2.ReplaceRootVolume( + if _, err = qEC2.ReplaceRootVolume( qEC2.ReplaceRootVolumeRequest{ Region: *h.Region, InstanceID: *h.Host.Instances[0].InstanceId, // Needto lookup for AMI + check if copy is required AMIID: *ami.Image.ImageId, - }) - if err != nil { - return err - } - r.lock = true - if err := r.manageMacMachine(h); err != nil { + Wait: true, + }); err != nil { return err } - // replace will run again the boostrap script to generate - // and set new keys to access the machine - r.replace = true + r.lock = false return r.manageMacMachine(h) } -// Release will set the lock as false -func (r *MacRequest) releaseLock(h *HostInformation) error { - r.lock = false - lockURN := fmt.Sprintf("urn:pulumi:%s::%s::%s::%s", - maptContext.StackNameByProject(stackMacMachine), - maptContext.ProjectName(), - customResourceTypeLock, - resourcesUtil.GetResourceName( - r.Prefix, awsMacMachineID, "mac-lock")) - - // rh:qe:aws:mac:lock main-amm-mac-lock - return r.manageMacMachineTargets(h, []string{lockURN}) +// Run the bootstrap script creating new access credentials for the user +func (r *MacRequest) replaceUserAccess(h *HostInformation) error { + r.replace = true + r.lock = true + return r.manageMacMachine(h) } // Release will set the lock as false @@ -236,14 +223,15 @@ func (r *MacRequest) deployerMachine(ctx *pulumi.Context) error { ctx.Export(fmt.Sprintf("%s-%s", r.Prefix, outputUserPassword), userPassword.Result) ctx.Export(fmt.Sprintf("%s-%s", r.Prefix, outputUserPrivateKey), ukp.PrivateKey.PrivateKeyPem) - // Create a lock on the machine - if err := machineLock(ctx, - resourcesUtil.GetResourceName( - r.Prefix, awsMacMachineID, "mac-lock"), r.lock); err != nil { + readiness, err := r.readiness(ctx, i, ukp.PrivateKey, bastion, []pulumi.Resource{bc}) + if err != nil { return err } ctx.Export(fmt.Sprintf("%s-%s", r.Prefix, outputLock), pulumi.Bool(r.lock)) - return r.readiness(ctx, i, ukp.PrivateKey, bastion, []pulumi.Resource{bc}) + return machineLock(ctx, + resourcesUtil.GetResourceName( + r.Prefix, awsMacMachineID, "mac-lock"), r.lock, + pulumi.DependsOn([]pulumi.Resource{readiness})) } // Write exported values in context to files o a selected target folder @@ -400,8 +388,8 @@ func (r *MacRequest) readiness(ctx *pulumi.Context, m *ec2.Instance, mk *tls.PrivateKey, b *bastion.Bastion, - dependecies []pulumi.Resource) error { - _, err := remote.NewCommand(ctx, + dependecies []pulumi.Resource) (*remote.Command, error) { + return remote.NewCommand(ctx, resourcesUtil.GetResourceName(r.Prefix, awsMacMachineID, "readiness-cmd"), &remote.CommandArgs{ Connection: remoteCommandArgs(m, mk, b), @@ -412,7 +400,6 @@ func (r *MacRequest) readiness(ctx *pulumi.Context, Create: remoteTimeout, Update: remoteTimeout}), pulumi.DependsOn(dependecies)) - return err } // helper function to set the connection args diff --git a/pkg/provider/aws/action/mac/mac.go b/pkg/provider/aws/action/mac/mac.go index b4248f0da..d22bf8384 100644 --- a/pkg/provider/aws/action/mac/mac.go +++ b/pkg/provider/aws/action/mac/mac.go @@ -10,11 +10,9 @@ import ( "github.com/redhat-developer/mapt/pkg/provider/aws/data" "github.com/redhat-developer/mapt/pkg/provider/aws/services/tag" "github.com/redhat-developer/mapt/pkg/util/logging" - - ec2Types "github.com/aws/aws-sdk-go-v2/service/ec2/types" ) -// Request could be interpreted as a general way to create / release +// Request could be interpreted as a general way to create / get a machine // // Some project will request a mac machine // based on tags it will check if there is any existing mac machine (based on labels + arch + MaxPoolSize) @@ -50,7 +48,7 @@ func Request(r *MacRequest) error { } return create(r, hi) } - err = r.replaceMachine(hi) + err = r.replaceUserAccess(hi) if err != nil { return err } @@ -78,13 +76,14 @@ func Release(prefix string, hostID string) error { maptContext.InitBase( *hi.ProjectName, *hi.BackedURL) + // Set a default request r := &MacRequest{ Prefix: prefix, - Architecture: archDefault, - Version: osVersionDefault, + Architecture: DefaultArch, + Version: DefaultOSVersion, } - return r.releaseLock(hi) + return r.replaceMachine(hi) } // Initial scenario consider 1 machine @@ -100,17 +99,6 @@ func Destroy(prefix, hostID string) error { maptContext.InitBase( *hi.ProjectName, *hi.BackedURL) - // Check if dh is available and it has no instance on it - // otherwise we can not release it - if hi.Host.State == ec2Types.AllocationStateAvailable && - len(host.Instances) == 0 { - return aws.DestroyStack(aws.DestroyStackRequest{ - Stackname: stackDedicatedHost, - // TODO check if needed to add region for backedURL - Region: *hi.Region, - BackedURL: *hi.BackedURL, - }) - } // Dedicated host is not on a valid state to be deleted // With same backedURL check if machine is locked machineLocked, err := isMachineLocked(prefix, hi) @@ -118,10 +106,18 @@ func Destroy(prefix, hostID string) error { return err } if !machineLocked { - return aws.DestroyStack(aws.DestroyStackRequest{ + if err := aws.DestroyStack(aws.DestroyStackRequest{ Stackname: stackMacMachine, Region: *hi.Region, BackedURL: *hi.BackedURL, + }); err != nil { + return err + } + return aws.DestroyStack(aws.DestroyStackRequest{ + Stackname: stackDedicatedHost, + // TODO check if needed to add region for backedURL + Region: *hi.Region, + BackedURL: *hi.BackedURL, }) } logging.Debug("nothing to be destroyed") diff --git a/pkg/provider/aws/services/ec2/compute/compute.go b/pkg/provider/aws/services/ec2/compute/compute.go index 39b0ae922..d7b82c023 100644 --- a/pkg/provider/aws/services/ec2/compute/compute.go +++ b/pkg/provider/aws/services/ec2/compute/compute.go @@ -2,33 +2,59 @@ package compute import ( "context" + "fmt" "github.com/aws/aws-sdk-go-v2/config" "github.com/aws/aws-sdk-go-v2/service/ec2" + "github.com/aws/aws-sdk-go-v2/service/ec2/types" ) type ReplaceRootVolumeRequest struct { Region string InstanceID string AMIID string + Wait bool } // This function will replace the root volume for a running ec2 instance // and will delete the replaced volume -func ReplaceRootVolume(r ReplaceRootVolumeRequest) (*ec2.CreateReplaceRootVolumeTaskOutput, error) { +// If wait flag is true on request the funcion will wait until the replace task succeed +// otherwise it will trigger the task and return the id to reference it +func ReplaceRootVolume(r ReplaceRootVolumeRequest) (*string, error) { + ctx := context.Background() cfg, err := config.LoadDefaultConfig( - context.Background(), + ctx, config.WithRegion(r.Region)) if err != nil { return nil, err } client := ec2.NewFromConfig(cfg) deleteReplacedRootVolume := true - return client.CreateReplaceRootVolumeTask( - context.Background(), + // rrvt, err := + rrvt, err := client.CreateReplaceRootVolumeTask( + ctx, &ec2.CreateReplaceRootVolumeTaskInput{ InstanceId: &r.InstanceID, DeleteReplacedRootVolume: &deleteReplacedRootVolume, ImageId: &r.AMIID, }) + if err != nil { + return rrvt.ReplaceRootVolumeTask.ReplaceRootVolumeTaskId, err + } + taskState := rrvt.ReplaceRootVolumeTask.TaskState + for r.Wait && taskState != types.ReplaceRootVolumeTaskStateSucceeded { + drvt, err := client.DescribeReplaceRootVolumeTasks(ctx, &ec2.DescribeReplaceRootVolumeTasksInput{ + ReplaceRootVolumeTaskIds: []string{*rrvt.ReplaceRootVolumeTask.ReplaceRootVolumeTaskId}, + }) + if err != nil { + return rrvt.ReplaceRootVolumeTask.ReplaceRootVolumeTaskId, nil + } + if len(drvt.ReplaceRootVolumeTasks) == 0 { + return rrvt.ReplaceRootVolumeTask.ReplaceRootVolumeTaskId, + fmt.Errorf("something wrong happened checkding the replace root volume task with id %s", + *rrvt.ReplaceRootVolumeTask.ReplaceRootVolumeTaskId) + } + taskState = drvt.ReplaceRootVolumeTasks[0].TaskState + } + return rrvt.ReplaceRootVolumeTask.ReplaceRootVolumeTaskId, nil }