Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add instance name to DiscoverEC2 User Task failed instances list #47712

Merged
merged 2 commits into from
Oct 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lib/srv/discovery/discovery.go
Original file line number Diff line number Diff line change
Expand Up @@ -990,10 +990,10 @@ func (s *Server) handleEC2RemoteInstallation(instances *server.EC2Instances) err
installerScript: req.InstallerScriptName(),
},
&usertasksv1.DiscoverEC2Instance{
// TODO(marco): add instance name
DiscoveryConfig: instances.DiscoveryConfig,
DiscoveryGroup: s.DiscoveryGroup,
InstanceId: instance.InstanceID,
Name: instance.InstanceName,
SyncTime: timestamppb.New(s.clock.Now()),
},
)
Expand Down
2 changes: 1 addition & 1 deletion lib/srv/discovery/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -310,12 +310,12 @@ func (s *Server) ReportEC2SSMInstallationResult(ctx context.Context, result *ser
installerScript: result.InstallerScript,
},
&usertasksv1.DiscoverEC2Instance{
// TODO(marco): add instance name
InvocationUrl: result.SSMRunEvent.InvocationURL,
DiscoveryConfig: result.DiscoveryConfig,
DiscoveryGroup: s.DiscoveryGroup,
SyncTime: timestamppb.New(result.SSMRunEvent.Time),
InstanceId: result.SSMRunEvent.InstanceID,
Name: result.InstanceName,
},
)

Expand Down
4 changes: 4 additions & 0 deletions lib/srv/server/ec2_watcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ type EC2Instances struct {
// discovered.
type EC2Instance struct {
InstanceID string
InstanceName string
Tags map[string]string
OriginalInstance ec2.Instance
}
Expand All @@ -92,6 +93,9 @@ func toEC2Instance(originalInst *ec2.Instance) EC2Instance {
for _, tag := range originalInst.Tags {
if key := aws.StringValue(tag.Key); key != "" {
inst.Tags[key] = aws.StringValue(tag.Value)
if key == "Name" {
inst.InstanceName = aws.StringValue(tag.Value)
}
}
}
return inst
Expand Down
82 changes: 78 additions & 4 deletions lib/srv/server/ec2_watcher_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,10 +176,16 @@ func TestEC2Watcher(t *testing.T) {

present := ec2.Instance{
InstanceId: aws.String("instance-present"),
Tags: []*ec2.Tag{{
Key: aws.String("teleport"),
Value: aws.String("yes"),
}},
Tags: []*ec2.Tag{
{
Key: aws.String("teleport"),
Value: aws.String("yes"),
},
{
Key: aws.String("Name"),
Value: aws.String("Present"),
},
},
State: &ec2.InstanceState{
Name: aws.String(ec2.InstanceStateNameRunning),
},
Expand Down Expand Up @@ -360,3 +366,71 @@ func TestMakeEvents(t *testing.T) {
})
}
}

func TestToEC2Instances(t *testing.T) {
sampleInstance := &ec2.Instance{
InstanceId: aws.String("instance-001"),
Tags: []*ec2.Tag{
{
Key: aws.String("teleport"),
Value: aws.String("yes"),
},
{
Key: aws.String("Name"),
Value: aws.String("MyInstanceName"),
},
},
State: &ec2.InstanceState{
Name: aws.String(ec2.InstanceStateNameRunning),
},
}

sampleInstanceWithoutName := &ec2.Instance{
InstanceId: aws.String("instance-001"),
Tags: []*ec2.Tag{
{
Key: aws.String("teleport"),
Value: aws.String("yes"),
},
},
State: &ec2.InstanceState{
Name: aws.String(ec2.InstanceStateNameRunning),
},
}

for _, tt := range []struct {
name string
input []*ec2.Instance
expected []EC2Instance
}{
{
name: "with name",
input: []*ec2.Instance{sampleInstance},
expected: []EC2Instance{{
InstanceID: "instance-001",
Tags: map[string]string{
"Name": "MyInstanceName",
"teleport": "yes",
},
InstanceName: "MyInstanceName",
OriginalInstance: *sampleInstance,
}},
},
{
name: "without name",
input: []*ec2.Instance{sampleInstanceWithoutName},
expected: []EC2Instance{{
InstanceID: "instance-001",
Tags: map[string]string{
"teleport": "yes",
},
OriginalInstance: *sampleInstanceWithoutName,
}},
},
} {
t.Run(tt.name, func(t *testing.T) {
got := ToEC2Instances(tt.input)
require.Equal(t, tt.expected, got)
})
}
}
78 changes: 49 additions & 29 deletions lib/srv/server/ssm_install.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ import (
"errors"
"fmt"
"log/slog"
"maps"
"slices"
"strings"

"github.com/aws/aws-sdk-go/aws"
Expand Down Expand Up @@ -67,6 +69,9 @@ type SSMInstallationResult struct {
SSMDocumentName string
// InstallerScript is the Teleport Installer script name used to install Teleport into the instance.
InstallerScript string
// InstanceName is the Instance's name.
// Might be empty.
InstanceName string
}

// SSMInstaller handles running SSM commands that install Teleport on EC2 instances.
Expand Down Expand Up @@ -134,18 +139,18 @@ func NewSSMInstaller(cfg SSMInstallerConfig) (*SSMInstaller, error) {

// Run executes the SSM document and then blocks until the command has completed.
func (si *SSMInstaller) Run(ctx context.Context, req SSMRunRequest) error {
ids := make([]string, 0, len(req.Instances))
instances := make(map[string]string, len(req.Instances))
for _, inst := range req.Instances {
ids = append(ids, inst.InstanceID)
instances[inst.InstanceID] = inst.InstanceName
}

params := make(map[string][]*string)
for k, v := range req.Params {
params[k] = []*string{aws.String(v)}
}

validInstances := ids
instancesState, err := si.describeSSMAgentState(ctx, req, ids)
validInstances := instances
instancesState, err := si.describeSSMAgentState(ctx, req, instances)
switch {
case trace.IsAccessDenied(err):
// describeSSMAgentState uses `ssm:DescribeInstanceInformation` to gather all the instances information.
Expand All @@ -169,9 +174,10 @@ func (si *SSMInstaller) Run(ctx context.Context, req SSMRunRequest) error {
validInstances = instancesState.valid
}

validInstanceIDs := instanceIDsFrom(validInstances)
output, err := req.SSM.SendCommandWithContext(ctx, &ssm.SendCommandInput{
DocumentName: aws.String(req.DocumentName),
InstanceIds: aws.StringSlice(validInstances),
InstanceIds: aws.StringSlice(validInstanceIDs),
Parameters: params,
})
if err != nil {
Expand All @@ -190,7 +196,7 @@ func (si *SSMInstaller) Run(ctx context.Context, req SSMRunRequest) error {
delete(params, ParamSSHDConfigPath)
output, err = req.SSM.SendCommandWithContext(ctx, &ssm.SendCommandInput{
DocumentName: aws.String(req.DocumentName),
InstanceIds: aws.StringSlice(validInstances),
InstanceIds: aws.StringSlice(validInstanceIDs),
Parameters: params,
})
if err != nil {
Expand All @@ -200,16 +206,17 @@ func (si *SSMInstaller) Run(ctx context.Context, req SSMRunRequest) error {

g, ctx := errgroup.WithContext(ctx)
g.SetLimit(10)
for _, inst := range validInstances {
inst := inst
for instanceID, instanceName := range validInstances {
instanceID := instanceID
instanceName := instanceName
g.Go(func() error {
return trace.Wrap(si.checkCommand(ctx, req, output.Command.CommandId, &inst))
return trace.Wrap(si.checkCommand(ctx, req, output.Command.CommandId, &instanceID, instanceName))
})
}
return trace.Wrap(g.Wait())
}

func invalidSSMInstanceInstallationResult(req SSMRunRequest, instanceID, status, issueType string) *SSMInstallationResult {
func invalidSSMInstanceInstallationResult(req SSMRunRequest, instanceID, instanceName, status, issueType string) *SSMInstallationResult {
return &SSMInstallationResult{
SSMRunEvent: &apievents.SSMRun{
Metadata: apievents.Metadata{
Expand All @@ -228,13 +235,14 @@ func invalidSSMInstanceInstallationResult(req SSMRunRequest, instanceID, status,
IssueType: issueType,
SSMDocumentName: req.DocumentName,
InstallerScript: req.InstallerScriptName(),
InstanceName: instanceName,
}
}

func (si *SSMInstaller) emitInvalidInstanceEvents(ctx context.Context, req SSMRunRequest, instanceIDsState *instanceIDsSSMState) error {
var errs []error
for _, instanceID := range instanceIDsState.missing {
installationResult := invalidSSMInstanceInstallationResult(req, instanceID,
for instanceID, instanceName := range instanceIDsState.missing {
installationResult := invalidSSMInstanceInstallationResult(req, instanceID, instanceName,
"EC2 Instance is not registered in SSM. Make sure that the instance has AmazonSSMManagedInstanceCore policy assigned.",
usertasks.AutoDiscoverEC2IssueSSMInstanceNotRegistered,
)
Expand All @@ -243,8 +251,8 @@ func (si *SSMInstaller) emitInvalidInstanceEvents(ctx context.Context, req SSMRu
}
}

for _, instanceID := range instanceIDsState.connectionLost {
installationResult := invalidSSMInstanceInstallationResult(req, instanceID,
for instanceID, instanceName := range instanceIDsState.connectionLost {
installationResult := invalidSSMInstanceInstallationResult(req, instanceID, instanceName,
"SSM Agent in EC2 Instance is not connecting to SSM Service. Restart or reinstall the SSM service. See https://docs.aws.amazon.com/systems-manager/latest/userguide/ami-preinstalled-agent.html#verify-ssm-agent-status for more details.",
usertasks.AutoDiscoverEC2IssueSSMInstanceConnectionLost,
)
Expand All @@ -253,8 +261,8 @@ func (si *SSMInstaller) emitInvalidInstanceEvents(ctx context.Context, req SSMRu
}
}

for _, instanceID := range instanceIDsState.unsupportedOS {
installationResult := invalidSSMInstanceInstallationResult(req, instanceID,
for instanceID, instanceName := range instanceIDsState.unsupportedOS {
installationResult := invalidSSMInstanceInstallationResult(req, instanceID, instanceName,
"EC2 instance is running an unsupported Operating System. Only Linux is supported.",
usertasks.AutoDiscoverEC2IssueSSMInstanceUnsupportedOS,
)
Expand All @@ -268,19 +276,29 @@ func (si *SSMInstaller) emitInvalidInstanceEvents(ctx context.Context, req SSMRu

// instanceIDsSSMState contains a list of EC2 Instance IDs for a given state.
type instanceIDsSSMState struct {
valid []string
missing []string
connectionLost []string
unsupportedOS []string
valid map[string]string
missing map[string]string
connectionLost map[string]string
unsupportedOS map[string]string
}

func instanceIDsFrom(m map[string]string) []string {
return slices.Collect(maps.Keys(m))
}

// describeSSMAgentState returns the instanceIDsSSMState for all the instances.
func (si *SSMInstaller) describeSSMAgentState(ctx context.Context, req SSMRunRequest, allInstanceIDs []string) (*instanceIDsSSMState, error) {
ret := &instanceIDsSSMState{}
func (si *SSMInstaller) describeSSMAgentState(ctx context.Context, req SSMRunRequest, allInstances map[string]string) (*instanceIDsSSMState, error) {
ret := &instanceIDsSSMState{
valid: make(map[string]string),
missing: make(map[string]string),
connectionLost: make(map[string]string),
unsupportedOS: make(map[string]string),
}
instanceIDs := instanceIDsFrom(allInstances)

ssmInstancesInfo, err := req.SSM.DescribeInstanceInformationWithContext(ctx, &ssm.DescribeInstanceInformationInput{
Filters: []*ssm.InstanceInformationStringFilter{
{Key: aws.String(ssm.InstanceInformationFilterKeyInstanceIds), Values: aws.StringSlice(allInstanceIDs)},
{Key: aws.String(ssm.InstanceInformationFilterKeyInstanceIds), Values: aws.StringSlice(instanceIDs)},
},
MaxResults: aws.Int64(awsEC2APIChunkSize),
})
Expand All @@ -294,24 +312,24 @@ func (si *SSMInstaller) describeSSMAgentState(ctx context.Context, req SSMRunReq
instanceStateByInstanceID[aws.StringValue(instanceState.InstanceId)] = instanceState
}

for _, instanceID := range allInstanceIDs {
for instanceID, instanceName := range allInstances {
instanceState, found := instanceStateByInstanceID[instanceID]
if !found {
ret.missing = append(ret.missing, instanceID)
ret.missing[instanceID] = instanceName
continue
}

if aws.StringValue(instanceState.PingStatus) == ssm.PingStatusConnectionLost {
ret.connectionLost = append(ret.connectionLost, instanceID)
ret.connectionLost[instanceID] = instanceName
continue
}

if aws.StringValue(instanceState.PlatformType) != ssm.PlatformTypeLinux {
ret.unsupportedOS = append(ret.unsupportedOS, instanceID)
ret.unsupportedOS[instanceID] = instanceName
continue
}

ret.valid = append(ret.valid, instanceID)
ret.valid[instanceID] = instanceName
}

return ret, nil
Expand All @@ -330,7 +348,7 @@ func skipAWSWaitErr(err error) error {
return trace.Wrap(err)
}

func (si *SSMInstaller) checkCommand(ctx context.Context, req SSMRunRequest, commandID, instanceID *string) error {
func (si *SSMInstaller) checkCommand(ctx context.Context, req SSMRunRequest, commandID, instanceID *string, instanceName string) error {
err := req.SSM.WaitUntilCommandExecutedWithContext(ctx, &ssm.GetCommandInvocationInput{
CommandId: commandID,
InstanceId: instanceID,
Expand Down Expand Up @@ -377,6 +395,7 @@ func (si *SSMInstaller) checkCommand(ctx context.Context, req SSMRunRequest, com
IssueType: usertasks.AutoDiscoverEC2IssueSSMScriptFailure,
SSMDocumentName: req.DocumentName,
InstallerScript: req.InstallerScriptName(),
InstanceName: instanceName,
}))
}

Expand All @@ -393,6 +412,7 @@ func (si *SSMInstaller) checkCommand(ctx context.Context, req SSMRunRequest, com
IssueType: usertasks.AutoDiscoverEC2IssueSSMScriptFailure,
SSMDocumentName: req.DocumentName,
InstallerScript: req.InstallerScriptName(),
InstanceName: instanceName,
}))
}
}
Expand Down
3 changes: 2 additions & 1 deletion lib/srv/server/ssm_install_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ func TestSSMInstaller(t *testing.T) {
name: "ssm run was successful",
req: SSMRunRequest{
Instances: []EC2Instance{
{InstanceID: "instance-id-1"},
{InstanceID: "instance-id-1", InstanceName: "my-instance-name"},
},
DocumentName: document,
Params: map[string]string{"token": "abcdefg"},
Expand Down Expand Up @@ -146,6 +146,7 @@ func TestSSMInstaller(t *testing.T) {
},
IssueType: "ec2-ssm-script-failure",
SSMDocumentName: "ssmdocument",
InstanceName: "my-instance-name",
}},
},
{
Expand Down
Loading