Skip to content

Commit

Permalink
refactor/new metrics (#11)
Browse files Browse the repository at this point in the history
* refactor-faults-metrics
* implement listiscsisession api call
  • Loading branch information
mjavier2k authored Mar 17, 2020
1 parent c8e1779 commit c76c8ae
Show file tree
Hide file tree
Showing 7 changed files with 153 additions and 87 deletions.
77 changes: 46 additions & 31 deletions pkg/prom/collector.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package prom

import (
"fmt"
"math"
"strconv"
"strings"
Expand Down Expand Up @@ -94,10 +95,7 @@ func (c *solidfireCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- MetricDescriptions.ClusterCapacityCompressionFactor
ch <- MetricDescriptions.ClusterCapacityEfficiencyFactor

ch <- MetricDescriptions.ClusterActiveFaultsBestPractice
ch <- MetricDescriptions.ClusterActiveFaultsWarning
ch <- MetricDescriptions.ClusterActiveFaultsError
ch <- MetricDescriptions.ClusterActiveFaultsCritical
ch <- MetricDescriptions.ClusterActiveFaults

ch <- MetricDescriptions.NodeStatsCBytesIn
ch <- MetricDescriptions.NodeStatsCBytesOut
Expand Down Expand Up @@ -164,6 +162,7 @@ func (c *solidfireCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- MetricDescriptions.ListDrivesStatus
ch <- MetricDescriptions.ListDrivesCapacity

ch <- MetricDescriptions.NodeISCSISessionsTotal
}

func (c *solidfireCollector) Collect(ch chan<- prometheus.Metric) {
Expand Down Expand Up @@ -526,37 +525,24 @@ func (c *solidfireCollector) Collect(ch chan<- prometheus.Metric) {
scrapeSuccess = 0
log.Errorln(err)
}
severity := map[string]float64{
solidfire.FaultBestPractice: 0,
solidfire.FaultWarning: 0,
solidfire.FaultError: 0,
solidfire.FaultCritical: 0,
}

for _, f := range ClusterActiveFaults.Result.Faults {
severity[f.Severity]++
ch <- prometheus.MustNewConstMetric(
MetricDescriptions.ClusterActiveFaults,
prometheus.GaugeValue,
1,
strconv.Itoa(f.NodeID),
nodesNamesByID[f.NodeID],
f.Code,
f.Severity,
f.Type,
fmt.Sprintf("%f", f.ServiceID),
strconv.FormatBool(f.Resolved),
fmt.Sprintf("%f", f.NodeHardwareFaultID),
fmt.Sprintf("%f", f.DriveID),
)
}

ch <- prometheus.MustNewConstMetric(
MetricDescriptions.ClusterActiveFaultsBestPractice,
prometheus.GaugeValue,
severity[solidfire.FaultBestPractice])

ch <- prometheus.MustNewConstMetric(
MetricDescriptions.ClusterActiveFaultsWarning,
prometheus.GaugeValue,
severity[solidfire.FaultWarning])

ch <- prometheus.MustNewConstMetric(
MetricDescriptions.ClusterActiveFaultsError,
prometheus.GaugeValue,
severity[solidfire.FaultError])

ch <- prometheus.MustNewConstMetric(
MetricDescriptions.ClusterActiveFaultsCritical,
prometheus.GaugeValue,
severity[solidfire.FaultCritical])

// List Cluster Stats
ClusterNodeStats, err := c.client.ListNodeStats()
if err != nil {
Expand Down Expand Up @@ -1183,6 +1169,35 @@ func (c *solidfireCollector) Collect(ch chan<- prometheus.Metric) {
)
}

ListISCSISessions, err := c.client.ListISCSISessions()
if err != nil {
scrapeSuccess = 0
log.Errorln(err)
}

sessions := make(map[int]map[int]float64)

for _, session := range ListISCSISessions.Result.Sessions {
if sessions[session.NodeID] == nil {
sessions[session.NodeID] = make(map[int]float64)
}
sessions[session.NodeID][session.VolumeID]++
}

for node, v := range sessions {
for vol, val := range v {
ch <- prometheus.MustNewConstMetric(
MetricDescriptions.NodeISCSISessionsTotal,
prometheus.GaugeValue,
val,
strconv.Itoa(node),
nodesNamesByID[node],
strconv.Itoa(vol),
volumeNamesByID[vol],
)
}
}

// Set scrape success metric to scrapeSuccess
ch <- prometheus.MustNewConstMetric(MetricDescriptions.ScrapeSuccessDesc, prometheus.GaugeValue, scrapeSuccess)
}
Expand Down
43 changes: 14 additions & 29 deletions pkg/prom/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,7 @@ type Descriptions struct {
ClusterCapacityEfficiencyFactor *prometheus.Desc

// ListClusterFaults
ClusterActiveFaultsWarning *prometheus.Desc
ClusterActiveFaultsError *prometheus.Desc
ClusterActiveFaultsCritical *prometheus.Desc
ClusterActiveFaultsBestPractice *prometheus.Desc
ClusterActiveFaults *prometheus.Desc

// ListNodeStats
NodeStatsCBytesIn *prometheus.Desc
Expand Down Expand Up @@ -145,6 +142,8 @@ type Descriptions struct {

ListDrivesStatus *prometheus.Desc
ListDrivesCapacity *prometheus.Desc

NodeISCSISessionsTotal *prometheus.Desc
}

func NewMetricDescriptions(namespace string) *Descriptions {
Expand Down Expand Up @@ -482,31 +481,10 @@ func NewMetricDescriptions(namespace string) *Descriptions {
nil,
)

d.ClusterActiveFaultsWarning = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "cluster_active_faults_warning"),
"The total number of warning faults in the system",
nil,
nil,
)

d.ClusterActiveFaultsError = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "cluster_active_faults_error"),
"The total number of error faults in the system",
nil,
nil,
)

d.ClusterActiveFaultsCritical = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "cluster_active_faults_critical"),
"The total number of critical faults in the system",
nil,
nil,
)

d.ClusterActiveFaultsBestPractice = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "cluster_active_faults_best_practice"),
"The total number of best practice faults in the system",
nil,
d.ClusterActiveFaults = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "cluster_active_faults"),
"List of any active faults detected on the cluster",
[]string{"node_id", "node_name", "code", "severity", "type", "service_id", "resolved", "node_hardware_fault_id", "drive_id"},
nil,
)

Expand Down Expand Up @@ -972,5 +950,12 @@ func NewMetricDescriptions(namespace string) *Descriptions {
nil,
)

d.NodeISCSISessionsTotal = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "node_iscsi_sessions_total"),
"The total number of iscsi sessions per node and volume",
[]string{"node_id", "node_name", "volume_id", "volume_name"},
nil,
)

return &d
}
8 changes: 0 additions & 8 deletions pkg/solidfire/const.go

This file was deleted.

22 changes: 22 additions & 0 deletions pkg/solidfire/solidfire.go
Original file line number Diff line number Diff line change
Expand Up @@ -283,3 +283,25 @@ func (s *Client) ListDrives() (ListDrivesResponse, error) {
}
return r, nil
}

func (s *Client) ListISCSISessions() (ListISCSISessionsResponse, error) {
payload := &RPCBody{
Method: "ListISCSISessions",
Params: ListISCSISessionsParams{},
ID: 1,
}

payloadBytes, err := json.Marshal(&payload)
r := ListISCSISessionsResponse{}
bodyBytes, err := doRpcCall(s, payloadBytes)

if err != nil {
return r, err
}
err = json.Unmarshal(bodyBytes, &r)

if err != nil {
return r, err
}
return r, nil
}
25 changes: 6 additions & 19 deletions pkg/solidfire/solidfire_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ func TestClient_ListClusterActiveFaults(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
defer gock.Off()
//gock.Observe(gock.DumpRequest)
// gock.Observe(gock.DumpRequest)
gock.New(sfHost).
Post(sfRPCEndpoint).
MatchType("json").
Expand All @@ -190,19 +190,6 @@ func TestClient_ListClusterActiveFaults(t *testing.T) {
gotRaw, err := sfClient.ListClusterActiveFaults()
got := gotRaw.Result.Faults[0].ClusterFaultID

severity := map[string]float64{
solidfire.FaultBestPractice: 0,
solidfire.FaultWarning: 0,
solidfire.FaultError: 0,
solidfire.FaultCritical: 0,
}

for _, f := range gotRaw.Result.Faults {
severity[f.Severity]++
}

fmt.Printf("%v", severity)

if (err != nil) != tt.wantErr {
t.Errorf("Client.ListClusterActiveFaults() error = %v, wantErr %v", err, tt.wantErr)
return
Expand Down Expand Up @@ -234,7 +221,7 @@ func TestClient_ListNodeStats(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
defer gock.Off()
gock.Observe(gock.DumpRequest)
// gock.Observe(gock.DumpRequest)
gock.New(sfHost).
Post(sfRPCEndpoint).
MatchType("json").
Expand Down Expand Up @@ -279,7 +266,7 @@ func TestClient_ListVolumeQoSHistograms(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
defer gock.Off()
gock.Observe(gock.DumpRequest)
// gock.Observe(gock.DumpRequest)
gock.New(sfHost).
Post(sfRPCEndpoint).
MatchType("json").
Expand Down Expand Up @@ -326,7 +313,7 @@ func TestClient_ListAllNodes(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
defer gock.Off()
gock.Observe(gock.DumpRequest)
// gock.Observe(gock.DumpRequest)
gock.New(sfHost).
Post(sfRPCEndpoint).
MatchType("json").
Expand Down Expand Up @@ -371,7 +358,7 @@ func TestClient_GetClusterStats(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
defer gock.Off()
gock.Observe(gock.DumpRequest)
// gock.Observe(gock.DumpRequest)
gock.New(sfHost).
Post(sfRPCEndpoint).
MatchType("json").
Expand Down Expand Up @@ -416,7 +403,7 @@ func TestClient_GetClusterFullThreshold(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
defer gock.Off()
gock.Observe(gock.DumpRequest)
// gock.Observe(gock.DumpRequest)
gock.New(sfHost).
Post(sfRPCEndpoint).
MatchType("json").
Expand Down
33 changes: 33 additions & 0 deletions pkg/solidfire/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ type ListDrivesParams struct {
// No params needed
}

type ListISCSISessionsParams struct {
// No params needed
}

type ListVolumesResponse struct {
ID int `json:"id"`
Result struct {
Expand Down Expand Up @@ -408,3 +412,32 @@ type ListDrivesResponse struct {
} `json:"drives"`
} `json:"result"`
}

type ListISCSISessionsResponse struct {
ID int `json:"id"`
Result struct {
Sessions []struct {
AccountID int `json:"accountID"`
AccountName string `json:"accountName"`
CreateTime time.Time `json:"createTime"`
DriveID int `json:"driveID"`
DriveIDs []int `json:"driveIDs"`
Initiator interface{} `json:"initiator"`
InitiatorIP string `json:"initiatorIP"`
InitiatorName string `json:"initiatorName"`
InitiatorPortName string `json:"initiatorPortName"`
InitiatorSessionID float64 `json:"initiatorSessionID"`
MsSinceLastIscsiPDU int `json:"msSinceLastIscsiPDU"`
MsSinceLastScsiCommand int `json:"msSinceLastScsiCommand"`
NodeID int `json:"nodeID"`
ServiceID int `json:"serviceID"`
SessionID int64 `json:"sessionID"`
TargetIP string `json:"targetIP"`
TargetName string `json:"targetName"`
TargetPortName string `json:"targetPortName"`
VirtualNetworkID int `json:"virtualNetworkID"`
VolumeID int `json:"volumeID"`
VolumeInstance int64 `json:"volumeInstance"`
} `json:"sessions"`
} `json:"result"`
}
32 changes: 32 additions & 0 deletions test/fixtures/listiscsisessions.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{
"id": 1,
"result": {
"sessions": [
{
"accountID": 1,
"accountName": "account1",
"createTime": "2017-04-11T03:39:32.030291Z",
"driveID": 23,
"driveIDs": [
23
],
"initiator": null,
"initiatorIP": "10.1.1.1:37138",
"initiatorName": "iqn.2010-01.net.solidfire.eng:c",
"initiatorPortName": "iqn.2010-01.net.solidfire.eng:c,i, 0x23d860000",
"initiatorSessionID": 9622126592,
"msSinceLastIscsiPDU": 243,
"msSinceLastScsiCommand": 141535021,
"nodeID": 3,
"serviceID": 6,
"sessionID": 25769804943,
"targetIP": "10.1.1.2:3260",
"targetName": "iqn.2010-01.com.solidfire:a7sd.3",
"targetPortName": "iqn.2010-01.com.solidfire:a7sd.3,t,0x1",
"virtualNetworkID": 0,
"volumeID": 3,
"volumeInstance": 140327214758656
}
]
}
}

0 comments on commit c76c8ae

Please sign in to comment.