Skip to content

Commit

Permalink
fix: Remove confused metrics
Browse files Browse the repository at this point in the history
The exporter presently has metrics that are nonsense for a given type of
drive, and remain at zero due to their defaults.

Change the behavior to NOT emit a metric if the underlying JSON field is
not present.

Future related work may include parsing the corresponding metrics for
SATA/SAS SSDs (e.g. `smartctl_device_percentage_used` could derived from
`SSD_Life_Left` on some drives).

Metrics no longer exported for the wrong type of drive:
- `smartctl_device_nvme_capacity_bytes` (NVME-specific)
- `smartctl_device_available_spare` (NVME-specific, ATA possible)
- `smartctl_device_available_spare_threshold` (NVME-specific, ATA
  possible)
- `smartctl_device_critical_warning` (NVME-specific, ATA possible)
- `smartctl_device_interface_speed` (ATA-specific)
- `smartctl_device_media_errors` (NVME-specific, ATA possible)
- `smartctl_device_num_err_log_entries` (NVME-specific, SCSI uses
  distinct metrics, ATA possible)
- `smartctl_device_nvme_capacity_bytes` (NVME-specific)
- `smartctl_device_percentage_used` (NVME-specific, ATA possible)

Signed-off-by: Robin H. Johnson <[email protected]>
  • Loading branch information
robbat2 committed Oct 16, 2023
1 parent 558a760 commit d90594a
Showing 1 changed file with 93 additions and 49 deletions.
142 changes: 93 additions & 49 deletions smartctl.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ type SMARTDevice struct {
serial string
family string
model string
// These are used to select types of metrics.
interface_ string
protocol string
}

// SMARTctl object
Expand All @@ -41,15 +44,26 @@ type SMARTctl struct {

// NewSMARTctl is smartctl constructor
func NewSMARTctl(logger log.Logger, json gjson.Result, ch chan<- prometheus.Metric) SMARTctl {
var model_name string
if obj := json.Get("model_name"); obj.Exists() {
model_name = obj.String()
}
// If the drive returns an empty model name, replace that with unknown.
if model_name == "" {
model_name = "unknown"
}

return SMARTctl{
ch: ch,
json: json,
logger: logger,
device: SMARTDevice{
device: strings.TrimPrefix(strings.TrimSpace(json.Get("device.name").String()), "/dev/"),
serial: strings.TrimSpace(json.Get("serial_number").String()),
family: strings.TrimSpace(GetStringIfExists(json, "model_family", "unknown")),
model: strings.TrimSpace(json.Get("model_name").String()),
device: strings.TrimPrefix(strings.TrimSpace(json.Get("device.name").String()), "/dev/"),
serial: strings.TrimSpace(json.Get("serial_number").String()),
family: strings.TrimSpace(GetStringIfExists(json, "model_family", "unknown")),
model: strings.TrimSpace(model_name),
interface_: strings.TrimSpace(json.Get("device.type").String()),
protocol: strings.TrimSpace(json.Get("device.protocol").String()),
},
}
}
Expand All @@ -66,23 +80,29 @@ func (smart *SMARTctl) Collect() {
smart.minePowerOnSeconds()
smart.mineRotationRate()
smart.mineTemperatures()
smart.minePowerCycleCount()
smart.minePowerCycleCount() // ATA/SATA, NVME, SCSI, SAS
smart.mineDeviceSCTStatus()
smart.mineDeviceStatistics()
smart.mineDeviceErrorLog()
smart.mineDeviceSelfTestLog()
smart.mineDeviceERC()
smart.mineNvmePercentageUsed()
smart.mineNvmeAvailableSpare()
smart.mineNvmeAvailableSpareThreshold()
smart.mineNvmeCriticalWarning()
smart.mineNvmeMediaErrors()
smart.mineNvmeNumErrLogEntries()
smart.mineNvmeBytesRead()
smart.mineNvmeBytesWritten()
smart.mineSmartStatus()
smart.mineSCSIGrownDefectList()
smart.mineSCSIErrorCounterLog()

if smart.device.interface_ == "nvme" {
smart.mineNvmePercentageUsed()
smart.mineNvmeAvailableSpare()
smart.mineNvmeAvailableSpareThreshold()
smart.mineNvmeCriticalWarning()
smart.mineNvmeMediaErrors()
smart.mineNvmeNumErrLogEntries()
smart.mineNvmeBytesRead()
smart.mineNvmeBytesWritten()
}
// SCSI, SAS
if smart.device.interface_ == "scsi" {
smart.mineSCSIGrownDefectList()
smart.mineSCSIErrorCounterLog()
}
}

func (smart *SMARTctl) mineExitStatus() {
Expand All @@ -95,14 +115,13 @@ func (smart *SMARTctl) mineExitStatus() {
}

func (smart *SMARTctl) mineDevice() {
device := smart.json.Get("device")
smart.ch <- prometheus.MustNewConstMetric(
metricDeviceModel,
prometheus.GaugeValue,
1,
smart.device.device,
device.Get("type").String(),
device.Get("protocol").String(),
smart.device.interface_,
smart.device.protocol,
smart.device.family,
smart.device.model,
smart.device.serial,
Expand Down Expand Up @@ -130,12 +149,15 @@ func (smart *SMARTctl) mineCapacity() {
smart.json.Get("user_capacity.bytes").Float(),
smart.device.device,
)
smart.ch <- prometheus.MustNewConstMetric(
metricDeviceTotalCapacityBytes,
prometheus.GaugeValue,
smart.json.Get("nvme_total_capacity").Float(),
smart.device.device,
)
nvme_total_capacity := smart.json.Get("nvme_total_capacity")
if nvme_total_capacity.Exists() {
smart.ch <- prometheus.MustNewConstMetric(
metricDeviceTotalCapacityBytes,
prometheus.GaugeValue,
nvme_total_capacity.Float(),
smart.device.device,
)
}
}

func (smart *SMARTctl) mineBlockSize() {
Expand All @@ -152,15 +174,19 @@ func (smart *SMARTctl) mineBlockSize() {

func (smart *SMARTctl) mineInterfaceSpeed() {
iSpeed := smart.json.Get("interface_speed")
for _, speedType := range []string{"max", "current"} {
tSpeed := iSpeed.Get(speedType)
smart.ch <- prometheus.MustNewConstMetric(
metricDeviceInterfaceSpeed,
prometheus.GaugeValue,
tSpeed.Get("units_per_second").Float()*tSpeed.Get("bits_per_unit").Float(),
smart.device.device,
speedType,
)
if iSpeed.Exists() {
for _, speedType := range []string{"max", "current"} {
tSpeed := iSpeed.Get(speedType)
if tSpeed.Exists() {
smart.ch <- prometheus.MustNewConstMetric(
metricDeviceInterfaceSpeed,
prometheus.GaugeValue,
tSpeed.Get("units_per_second").Float()*tSpeed.Get("bits_per_unit").Float(),
smart.device.device,
speedType,
)
}
}
}
}

Expand Down Expand Up @@ -200,16 +226,21 @@ func (smart *SMARTctl) mineDeviceAttribute() {

func (smart *SMARTctl) minePowerOnSeconds() {
pot := smart.json.Get("power_on_time")
smart.ch <- prometheus.MustNewConstMetric(
metricDevicePowerOnSeconds,
prometheus.CounterValue,
GetFloatIfExists(pot, "hours", 0)*60*60+GetFloatIfExists(pot, "minutes", 0)*60,
smart.device.device,
)
// If the power_on_time is NOT present, do not report as 0.
if pot.Exists() {
smart.ch <- prometheus.MustNewConstMetric(
metricDevicePowerOnSeconds,
prometheus.CounterValue,
GetFloatIfExists(pot, "hours", 0)*60*60+GetFloatIfExists(pot, "minutes", 0)*60,
smart.device.device,
)
}
}

func (smart *SMARTctl) mineRotationRate() {
rRate := GetFloatIfExists(smart.json, "rotation_rate", 0)
// TODO: what should be done if this is absent vs really zero (for
// solid-state drives)?
if rRate > 0 {
smart.ch <- prometheus.MustNewConstMetric(
metricDeviceRotationRate,
Expand Down Expand Up @@ -237,12 +268,17 @@ func (smart *SMARTctl) mineTemperatures() {
}

func (smart *SMARTctl) minePowerCycleCount() {
smart.ch <- prometheus.MustNewConstMetric(
metricDevicePowerCycleCount,
prometheus.CounterValue,
smart.json.Get("power_cycle_count").Float(),
smart.device.device,
)
// ATA & NVME
powerCycleCount := smart.json.Get("power_cycle_count")
if powerCycleCount.Exists() {
smart.ch <- prometheus.MustNewConstMetric(
metricDevicePowerCycleCount,
prometheus.CounterValue,
powerCycleCount.Float(),
smart.device.device,
)
return
}
}

func (smart *SMARTctl) mineDeviceSCTStatus() {
Expand Down Expand Up @@ -312,25 +348,33 @@ func (smart *SMARTctl) mineNvmeNumErrLogEntries() {
}

func (smart *SMARTctl) mineNvmeBytesRead() {
blockSize := smart.json.Get("logical_block_size").Float()
blockSize := smart.json.Get("logical_block_size")
data_units_read := smart.json.Get("nvme_smart_health_information_log.data_units_read")
if !blockSize.Exists() || !data_units_read.Exists() {
return
}
smart.ch <- prometheus.MustNewConstMetric(
metricDeviceBytesRead,
prometheus.CounterValue,
// This value is reported in thousands (i.e., a value of 1 corresponds to 1000 units of 512 bytes written) and is rounded up.
// When the LBA size is a value other than 512 bytes, the controller shall convert the amount of data written to 512 byte units.
smart.json.Get("nvme_smart_health_information_log.data_units_read").Float()*1000.0*blockSize,
data_units_read.Float()*1000.0*blockSize.Float(),
smart.device.device,
)
}

func (smart *SMARTctl) mineNvmeBytesWritten() {
blockSize := smart.json.Get("logical_block_size").Float()
blockSize := smart.json.Get("logical_block_size")
data_units_written := smart.json.Get("nvme_smart_health_information_log.data_units_written")
if !blockSize.Exists() || !data_units_written.Exists() {
return
}
smart.ch <- prometheus.MustNewConstMetric(
metricDeviceBytesWritten,
prometheus.CounterValue,
// This value is reported in thousands (i.e., a value of 1 corresponds to 1000 units of 512 bytes written) and is rounded up.
// When the LBA size is a value other than 512 bytes, the controller shall convert the amount of data written to 512 byte units.
smart.json.Get("nvme_smart_health_information_log.data_units_written").Float()*1000.0*blockSize,
data_units_written.Float()*1000.0*blockSize.Float(),
smart.device.device,
)
}
Expand Down

0 comments on commit d90594a

Please sign in to comment.