Skip to content

Commit

Permalink
Improve client tools host resolution (#50175)
Browse files Browse the repository at this point in the history
Host resolution performed because labels, fuzzy search, or predicate
expressions were supplied to commands that establish connections to
a single host has historically been performed client side in tsh.
While that works in most cases, it can prevent correctly
resolving hosts in some situations, i.e. when there are ambiguous
hosts and tsh is unaware that the cluster routing strategy is set
to ROUTE_TO_MOST_RECENT.

To improve the experience, a new ResolveSSHTarget was added to
Auth to allow host resolution to be performed server side. The
resolution works in a similar manner to, and was inspired by
GetSSHTargets. In the event that the new RPC is not implemented,
because the client is newer than Auth, tsh has also been updated
to pull the cluster networking config and address any host
ambiguity if allowed.

As a result tsh scp and tsh proxy ssh should be much more
tolerant to, and still permit access in situations where
ambiguous hosts are present for some amount of time. Prior to
this the only way to connect in these situations was to find
the UUID of the correct target instance and try again after
seeing an ambiguous host error.
  • Loading branch information
rosstimothy committed Jan 6, 2025
1 parent 7019692 commit 4266b5b
Show file tree
Hide file tree
Showing 21 changed files with 2,362 additions and 1,095 deletions.
6 changes: 6 additions & 0 deletions api/client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -4277,6 +4277,12 @@ func (c *Client) GetSSHTargets(ctx context.Context, req *proto.GetSSHTargetsRequ
return rsp, trace.Wrap(err)
}

// ResolveSSHTarget gets a server that would match an equivalent ssh dial request.
func (c *Client) ResolveSSHTarget(ctx context.Context, req *proto.ResolveSSHTargetRequest) (*proto.ResolveSSHTargetResponse, error) {
rsp, err := c.grpc.ResolveSSHTarget(ctx, req)
return rsp, trace.Wrap(err)
}

// CreateSessionTracker creates a tracker resource for an active session.
func (c *Client) CreateSessionTracker(ctx context.Context, st types.SessionTracker) (types.SessionTracker, error) {
v1, ok := st.(*types.SessionTrackerV1)
Expand Down
2,750 changes: 1,752 additions & 998 deletions api/client/proto/authservice.pb.go

Large diffs are not rendered by default.

33 changes: 33 additions & 0 deletions api/proto/teleport/legacy/client/proto/authservice.proto
Original file line number Diff line number Diff line change
Expand Up @@ -2034,6 +2034,36 @@ message ListResourcesRequest {
bool IncludeLogins = 13 [(gogoproto.jsontag) = "include_logins,omitempty"];
}

// ResolveSSHTargetRequest provides details about a server to be resolved in
// an equivalent manner to a ssh dial request.
//
// Resolution can happen in two modes:
// 1) searching for hosts based on labels, a predicate expression, or keywords
// 2) searching based on hostname
//
// If a Host is provided, resolution will only operate in the second mode and
// will not perform any resolution based on labels. In order to resolve via
// labels the Host must not be populated.
message ResolveSSHTargetRequest {
// The target host as would be sent to the proxy during a dial request.
string host = 1;
// The ssh port. This value is optional, and both empty string and "0" are typically
// treated as meaning that any port should match.
string port = 2;
// If not empty, a label-based matcher.
map<string, string> labels = 3;
// Boolean conditions that will be matched against the resource.
string predicate_expression = 4;
// A list of search keywords to match against resource field values.
repeated string search_keywords = 5;
}

// GetSSHTargetsResponse holds ssh servers that match an ssh targets request.
message ResolveSSHTargetResponse {
// The target matching the supplied request.
types.ServerV2 server = 1;
}

// GetSSHTargetsRequest gets all servers that might match an equivalent ssh dial request.
message GetSSHTargetsRequest {
// Host is the target host as would be sent to the proxy during a dial request.
Expand Down Expand Up @@ -3438,6 +3468,9 @@ service AuthService {
// but may result in confusing behavior if it is used outside of those contexts.
rpc GetSSHTargets(GetSSHTargetsRequest) returns (GetSSHTargetsResponse);

// ResolveSSHTarget returns the server that would be resolved in an equivalent ssh dial request.
rpc ResolveSSHTarget(ResolveSSHTargetRequest) returns (ResolveSSHTargetResponse);

// GetDomainName returns local auth domain of the current auth server
rpc GetDomainName(google.protobuf.Empty) returns (GetDomainNameResponse);
// GetClusterCACert returns the PEM-encoded TLS certs for the local cluster
Expand Down
8 changes: 8 additions & 0 deletions constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ package teleport
import (
"strings"
"time"

"github.com/gravitational/trace"
)

// WebAPIVersion is a current webapi version
Expand Down Expand Up @@ -815,9 +817,15 @@ const (
UsageWindowsDesktopOnly = "usage:windows_desktop"
)

// ErrNodeIsAmbiguous serves as an identifying error string indicating that
// the proxy subsystem found multiple nodes matching the specified hostname.
var ErrNodeIsAmbiguous = &trace.NotFoundError{Message: "ambiguous host could match multiple nodes"}

const (
// NodeIsAmbiguous serves as an identifying error string indicating that
// the proxy subsystem found multiple nodes matching the specified hostname.
// TODO(tross) DELETE IN v20.0.0
// Deprecated: Prefer using ErrNodeIsAmbiguous
NodeIsAmbiguous = "err-node-is-ambiguous"

// MaxLeases serves as an identifying error string indicating that the
Expand Down
4 changes: 1 addition & 3 deletions integration/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -872,9 +872,7 @@ func testUUIDBasedProxy(t *testing.T, suite *integrationTestSuite) {
// attempting to run a command by hostname should generate NodeIsAmbiguous error.
_, err = runCommand(t, teleportSvr, []string{"echo", "Hello there!"}, helpers.ClientConfig{Login: suite.Me.Username, Cluster: helpers.Site, Host: Host}, 1)
require.Error(t, err)
if !strings.Contains(err.Error(), teleport.NodeIsAmbiguous) {
require.FailNowf(t, "Expected %s, got %s", teleport.NodeIsAmbiguous, err.Error())
}
require.ErrorContains(t, err, "ambiguous")

// attempting to run a command by uuid should succeed.
_, err = runCommand(t, teleportSvr, []string{"echo", "Hello there!"}, helpers.ClientConfig{Login: suite.Me.Username, Cluster: helpers.Site, Host: uuid1}, 1)
Expand Down
99 changes: 93 additions & 6 deletions lib/auth/auth_with_roles.go
Original file line number Diff line number Diff line change
Expand Up @@ -1605,22 +1605,23 @@ func (a *ServerWithRoles) GetSSHTargets(ctx context.Context, req *proto.GetSSHTa
return nil, trace.Wrap(err)
}

lreq := proto.ListResourcesRequest{
ResourceType: types.KindNode,
lreq := &proto.ListUnifiedResourcesRequest{
Kinds: []string{types.KindNode},
SortBy: types.SortBy{Field: types.ResourceMetadataName},
UseSearchAsRoles: true,
}
var servers []*types.ServerV2
for {
// note that we're calling ServerWithRoles.ListResources here rather than some internal method. This method
// note that we're calling ServerWithRoles.ListUnifiedResources here rather than some internal method. This method
// delegates all RBAC filtering to ListResources, and then performs additional filtering on top of that.
lrsp, err := a.ListResources(ctx, lreq)
lrsp, err := a.ListUnifiedResources(ctx, lreq)
if err != nil {
return nil, trace.Wrap(err)
}

for _, rsc := range lrsp.Resources {
srv, ok := rsc.(*types.ServerV2)
if !ok {
srv := rsc.GetNode()
if srv == nil {
log.Warnf("Unexpected resource type %T, expected *types.ServerV2 (skipping)", rsc)
continue
}
Expand All @@ -1644,6 +1645,92 @@ func (a *ServerWithRoles) GetSSHTargets(ctx context.Context, req *proto.GetSSHTa
}, nil
}

// ResolveSSHTarget gets a server that would match an equivalent ssh dial request.
func (a *ServerWithRoles) ResolveSSHTarget(ctx context.Context, req *proto.ResolveSSHTargetRequest) (*proto.ResolveSSHTargetResponse, error) {
// try to detect case-insensitive routing setting, but default to false if we can't load
// networking config (equivalent to proxy routing behavior).
var routeToMostRecent bool
if cfg, err := a.authServer.GetReadOnlyClusterNetworkingConfig(ctx); err == nil {
routeToMostRecent = cfg.GetRoutingStrategy() == types.RoutingStrategy_MOST_RECENT
}

var servers []*types.ServerV2
switch {
case req.Host != "":
if len(req.Labels) > 0 || req.PredicateExpression != "" || len(req.SearchKeywords) > 0 {
log.Warn("ssh target resolution request contained both host and a resource matcher - ignoring resource matcher")
}

resp, err := a.GetSSHTargets(ctx, &proto.GetSSHTargetsRequest{
Host: req.Host,
Port: req.Port,
})
if err != nil {
return nil, trace.Wrap(err)
}

servers = resp.Servers
case len(req.Labels) > 0 || req.PredicateExpression != "" || len(req.SearchKeywords) > 0:
lreq := &proto.ListUnifiedResourcesRequest{
Kinds: []string{types.KindNode},
SortBy: types.SortBy{Field: types.ResourceMetadataName},
Labels: req.Labels,
PredicateExpression: req.PredicateExpression,
SearchKeywords: req.SearchKeywords,
}
for {
// note that we're calling ServerWithRoles.ListUnifiedResources here rather than some internal method. This method
// delegates all RBAC filtering to ListResources, and then performs additional filtering on top of that.
lrsp, err := a.ListUnifiedResources(ctx, lreq)
if err != nil {
return nil, trace.Wrap(err)
}

for _, rsc := range lrsp.Resources {
srv := rsc.GetNode()
if srv == nil {
log.Warnf("Unexpected resource type %T, expected *types.ServerV2 (skipping)", rsc)
continue
}

servers = append(servers, srv)
}

// If the routing strategy doesn't permit ambiguous matches, then abort
// early if more than one server has been found already
if !routeToMostRecent && len(servers) > 1 {
break
}

if lrsp.NextKey == "" || len(lrsp.Resources) == 0 {
break
}

lreq.StartKey = lrsp.NextKey

}
default:
return nil, trace.BadParameter("request did not contain any host information or resource matcher")
}

switch len(servers) {
case 1:
return &proto.ResolveSSHTargetResponse{Server: servers[0]}, nil
case 0:
return nil, trace.NotFound("no matching hosts")
default:
if !routeToMostRecent {
return nil, trace.Wrap(teleport.ErrNodeIsAmbiguous)
}

// Return the most recent version of the resource.
server := slices.MaxFunc(servers, func(a, b *types.ServerV2) int {
return a.Expiry().Compare(b.Expiry())
})
return &proto.ResolveSSHTargetResponse{Server: server}, nil
}
}

// ListResources returns a paginated list of resources filtered by user access.
func (a *ServerWithRoles) ListResources(ctx context.Context, req proto.ListResourcesRequest) (*types.ListResourcesResponse, error) {
// Check if auth server has a license for this resource type but only return an
Expand Down
3 changes: 3 additions & 0 deletions lib/auth/authclient/clt.go
Original file line number Diff line number Diff line change
Expand Up @@ -1759,6 +1759,9 @@ type ClientI interface {
// but may result in confusing behavior if it is used outside of those contexts.
GetSSHTargets(ctx context.Context, req *proto.GetSSHTargetsRequest) (*proto.GetSSHTargetsResponse, error)

// ResolveSSHTarget returns the server that would be resolved in an equivalent ssh dial request.
ResolveSSHTarget(ctx context.Context, req *proto.ResolveSSHTargetRequest) (*proto.ResolveSSHTargetResponse, error)

// PerformMFACeremony retrieves an MFA challenge from the server with the given challenge extensions
// and prompts the user to answer the challenge with the given promptOpts, and ultimately returning
// an MFA challenge response for the user.
Expand Down
15 changes: 15 additions & 0 deletions lib/auth/grpcserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -4247,6 +4247,21 @@ func (g *GRPCServer) GetSSHTargets(ctx context.Context, req *authpb.GetSSHTarget
return rsp, nil
}

// ResolveSSHTarget gets a server that would match an equivalent ssh dial request.
func (g *GRPCServer) ResolveSSHTarget(ctx context.Context, req *authpb.ResolveSSHTargetRequest) (*authpb.ResolveSSHTargetResponse, error) {
auth, err := g.authenticate(ctx)
if err != nil {
return nil, trace.Wrap(err)
}

rsp, err := auth.ServerWithRoles.ResolveSSHTarget(ctx, req)
if err != nil {
return nil, trace.Wrap(err)
}

return rsp, nil
}

// CreateSessionTracker creates a tracker resource for an active session.
func (g *GRPCServer) CreateSessionTracker(ctx context.Context, req *authpb.CreateSessionTrackerRequest) (*types.SessionTrackerV1, error) {
auth, err := g.authenticate(ctx)
Expand Down
63 changes: 63 additions & 0 deletions lib/auth/grpcserver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2762,6 +2762,69 @@ func TestGetSSHTargets(t *testing.T) {
require.ElementsMatch(t, []string{rsp.Servers[0].GetHostname(), rsp.Servers[1].GetHostname()}, []string{"foo", "Foo"})
}

func TestResolveSSHTarget(t *testing.T) {
t.Parallel()
ctx := context.Background()
srv := newTestTLSServer(t)

clt, err := srv.NewClient(TestAdmin())
require.NoError(t, err)

upper, err := types.NewServerWithLabels(uuid.New().String(), types.KindNode, types.ServerSpecV2{
Hostname: "Foo",
UseTunnel: true,
}, nil)
require.NoError(t, err)
upper.SetExpiry(time.Now().Add(time.Hour))

lower, err := types.NewServerWithLabels(uuid.New().String(), types.KindNode, types.ServerSpecV2{
Hostname: "foo",
UseTunnel: true,
}, nil)
require.NoError(t, err)

other, err := types.NewServerWithLabels(uuid.New().String(), types.KindNode, types.ServerSpecV2{
Hostname: "bar",
UseTunnel: true,
}, nil)
require.NoError(t, err)

for _, node := range []types.Server{upper, lower, other} {
_, err = clt.UpsertNode(ctx, node)
require.NoError(t, err)
}

rsp, err := clt.ResolveSSHTarget(ctx, &proto.ResolveSSHTargetRequest{
Host: "foo",
Port: "0",
})
require.NoError(t, err)
require.Equal(t, "foo", rsp.Server.GetHostname())

cnc := types.DefaultClusterNetworkingConfig()
cnc.SetCaseInsensitiveRouting(true)
_, err = clt.UpsertClusterNetworkingConfig(ctx, cnc)
require.NoError(t, err)

rsp, err = clt.ResolveSSHTarget(ctx, &proto.ResolveSSHTargetRequest{
Host: "foo",
Port: "0",
})
require.Error(t, err)
require.Nil(t, rsp)

cnc.SetRoutingStrategy(types.RoutingStrategy_MOST_RECENT)
_, err = clt.UpsertClusterNetworkingConfig(ctx, cnc)
require.NoError(t, err)

rsp, err = clt.ResolveSSHTarget(ctx, &proto.ResolveSSHTargetRequest{
Host: "foo",
Port: "0",
})
require.NoError(t, err)
require.Equal(t, "Foo", rsp.Server.GetHostname())
}

func TestNodesCRUD(t *testing.T) {
t.Parallel()
ctx := context.Background()
Expand Down
Loading

0 comments on commit 4266b5b

Please sign in to comment.