Skip to content

Commit

Permalink
Return num_replicas_by_servable_model_path in Admin.Stats.
Browse files Browse the repository at this point in the history
This counts the maximum number of replicas that can be used to serve a servable model path. Clients can use this field to calculate their expected serving capacity.

PiperOrigin-RevId: 579106599
Change-Id: I34535da164d7729054d02bc08f606013312f67bc
  • Loading branch information
Sax Authors authored and copybara-github committed Nov 3, 2023
1 parent 2b4e72c commit dde398f
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 1 deletion.
6 changes: 5 additions & 1 deletion saxml/admin/admin.go
Original file line number Diff line number Diff line change
Expand Up @@ -216,10 +216,14 @@ func (s *Server) Stats(ctx context.Context, in *pb.StatsRequest) (*pb.StatsRespo
chipTopo pb.ModelServer_ChipTopology
}
replicasPerServerType := make(map[ServerType]int32)
replicasPerServableModelPath := make(map[string]int32)

for _, server := range servers {
serverType := ServerType{server.GetModelServer().GetChipType(), server.GetModelServer().GetChipTopology()}
replicasPerServerType[serverType] = replicasPerServerType[serverType] + 1
for _, servableModelPath := range server.GetModelServer().GetServableModelPaths() {
replicasPerServableModelPath[servableModelPath] = replicasPerServableModelPath[servableModelPath] + 1
}
}

modelServerTypeStats := []*pb.ModelServerTypeStat{}
Expand All @@ -230,7 +234,7 @@ func (s *Server) Stats(ctx context.Context, in *pb.StatsRequest) (*pb.StatsRespo
NumReplicas: replicas,
})
}
return &pb.StatsResponse{ModelServerTypeStats: modelServerTypeStats}, nil
return &pb.StatsResponse{ModelServerTypeStats: modelServerTypeStats, NumServersByServableModelPath: replicasPerServableModelPath}, nil
}

// WatchLoc handles WatchLoc RPC requests.
Expand Down
5 changes: 5 additions & 0 deletions saxml/protobuf/admin.proto
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,11 @@ message StatsRequest {

message StatsResponse {
repeated ModelServerTypeStat model_server_type_stats = 1;

// This counts the maximum number of servers that can be used to serve a
// servable model path. Clients can use this field to calculate their expected
// serving capacity.
map<string, int32> num_servers_by_servable_model_path = 2;
}

message WatchLocRequest {
Expand Down

0 comments on commit dde398f

Please sign in to comment.