diff --git a/monitor-agent/node_exporter/collector/monitor_log_metric_linux.go b/monitor-agent/node_exporter/collector/monitor_log_metric_linux.go index fef5005c0..21b9cf5c6 100644 --- a/monitor-agent/node_exporter/collector/monitor_log_metric_linux.go +++ b/monitor-agent/node_exporter/collector/monitor_log_metric_linux.go @@ -94,6 +94,7 @@ type logMetricMonitorNeObj struct { TailLastUnixTime int64 `json:"-"` DestroyChan chan int `json:"-"` TailDataCancelChan chan int `json:"-"` + MultiPathNum int `json:"-"` } type logMetricGroupNeObj struct { @@ -359,7 +360,7 @@ func (c *logMetricMonitorNeObj) start() { } if reopenFlag { level.Info(monitorLogger).Log("log_metric -> reopen", fmt.Sprintf("path:%s,serviceGroup:%s", c.Path, c.ServiceGroup)) - time.Sleep(2 * time.Second) + time.Sleep(500 * time.Millisecond) go c.start() } } @@ -398,6 +399,7 @@ func (c *logMetricMonitorNeObj) startMultiPath() { level.Warn(monitorLogger).Log("log_metric -> startMultiPath_cannotMatchAnyFile", fmt.Sprintf("path:%s,serviceGroup:%s", c.Path, c.ServiceGroup)) return } + c.MultiPathNum = len(pathList) go c.startHandleTailData() var destroyChanList []chan int for _, targetFilePath := range pathList { @@ -484,8 +486,9 @@ func (c *logMetricMonitorNeObj) new(input *logMetricMonitorNeObj) { // 把所有正则初始化 func (c *logMetricMonitorNeObj) update(input *logMetricMonitorNeObj) { + level.Info(monitorLogger).Log("do updateLogMetricMonitorNeObj", c.Path) c.Lock.Lock() - level.Info(monitorLogger).Log("updateLogMetricMonitorNeObj", c.Path) + level.Info(monitorLogger).Log("start updateLogMetricMonitorNeObj", c.Path) newJsonConfigList := []*logMetricJsonNeObj{} var err error for _, jsonObj := range input.JsonConfig { @@ -537,11 +540,15 @@ func (c *logMetricMonitorNeObj) update(input *logMetricMonitorNeObj) { c.TargetEndpoint = input.TargetEndpoint c.ServiceGroup = input.ServiceGroup c.MetricGroupConfig = newMetricGroupList - level.Info(monitorLogger).Log("MetricGroupConfig: ", fmt.Sprintf("len:%d", len(c.MetricGroupConfig))) + level.Info(monitorLogger).Log("updateLogMetricMonitorNeObj_MetricGroupConfig: ", fmt.Sprintf("len:%d", len(c.MetricGroupConfig))) c.Lock.Unlock() if strings.Contains(c.Path, "*") { - c.DestroyChan <- 1 - time.Sleep(2 * time.Second) + if c.MultiPathNum > 0 { + level.Info(monitorLogger).Log("start_updateLogMetricMonitorNeObj_destroy_*: ", c.Path) + c.DestroyChan <- 1 + level.Info(monitorLogger).Log("end_updateLogMetricMonitorNeObj_destroy_*: ", c.Path) + } + time.Sleep(500 * time.Millisecond) go c.startMultiPath() } } @@ -679,7 +686,9 @@ func LogMetricMonitorHandleAction(requestParamBuff []byte) error { if len(deletePathMap) > 0 && len(tmpLogMetricObjJobs) > 0 { for _, existJob := range tmpLogMetricObjJobs { if _, ok := deletePathMap[existJob.Path]; ok { - existJob.ReOpenHandlerChan <- 1 + if !strings.Contains(existJob.Path, "*") { + existJob.ReOpenHandlerChan <- 1 + } } } } diff --git a/monitor-server/services/db/log_metric.go b/monitor-server/services/db/log_metric.go index b7ce368ea..4829fbbb3 100644 --- a/monitor-server/services/db/log_metric.go +++ b/monitor-server/services/db/log_metric.go @@ -590,8 +590,8 @@ func getLogMetricRatePromExpr(metric, metricPrefix, aggType, serviceGroup, sucRe if metric == "req_fail_rate" { //result = fmt.Sprintf("100-100*((sum(%s{key=\"%sreq_suc_count\",agg=\"%s\",service_group=\"%s\",retcode=\"%s\",code=\"$t_code\"}) by (service_group,code))/(sum(%s{key=\"%sreq_count\",agg=\"%s\",service_group=\"%s\",code=\"$t_code\"}) by (service_group,code)) > 0 or vector(1))", // models.LogMetricName, metricPrefix, aggType, serviceGroup, sucRetCode, models.LogMetricName, metricPrefix, aggType, serviceGroup) - result = fmt.Sprintf("100-100*((sum(%s{key=\"%sreq_suc_count\",agg=\"%s\",service_group=\"%s\",retcode=\"%s\",code=\"$t_code\"}) by (service_group,code) > 0 or (sum(%s{key=\"%sreq_suc_count\",agg=\"%s\",service_group=\"%s\",retcode=\"%s\",code=\"$t_code\"}) by (service_group,code) + 1))/(sum(%s{key=\"%sreq_count\",agg=\"%s\",service_group=\"%s\",code=\"$t_code\"}) by (service_group,code) > 0 or (sum(%s{key=\"%sreq_count\",agg=\"%s\",service_group=\"%s\",code=\"$t_code\"}) by (service_group,code) + 1)))", - models.LogMetricName, metricPrefix, aggType, serviceGroup, sucRetCode, models.LogMetricName, metricPrefix, aggType, serviceGroup, sucRetCode, models.LogMetricName, metricPrefix, aggType, serviceGroup, models.LogMetricName, metricPrefix, aggType, serviceGroup) + result = fmt.Sprintf("100*((sum(%s{key=\"%sreq_suc_count\",agg=\"%s\",service_group=\"%s\",retcode!=\"%s\",code=\"$t_code\"}) by (service_group,code))/(sum(%s{key=\"%sreq_count\",agg=\"%s\",service_group=\"%s\",code=\"$t_code\"}) by (service_group,code) > 0 or (sum(%s{key=\"%sreq_count\",agg=\"%s\",service_group=\"%s\",code=\"$t_code\"}) by (service_group,code) + 1)))", + models.LogMetricName, metricPrefix, aggType, serviceGroup, sucRetCode, models.LogMetricName, metricPrefix, aggType, serviceGroup, models.LogMetricName, metricPrefix, aggType, serviceGroup) } return diff --git a/monitor-ui/src/views/monitor-config/log-template-config/json-regex.vue b/monitor-ui/src/views/monitor-config/log-template-config/json-regex.vue index 366eca163..2e2c8a847 100644 --- a/monitor-ui/src/views/monitor-config/log-template-config/json-regex.vue +++ b/monitor-ui/src/views/monitor-config/log-template-config/json-regex.vue @@ -168,10 +168,10 @@ import Vue from 'vue' import {thresholdList, lastList} from '@/assets/config/common-config.js' const initRangeConfigMap = { - req_suc_rate: { - operator: '<', - threshold: '90', - time: '60', + req_fail_rate: { + operator: '>', + threshold: '10', + time: '0', time_unit: 's' }, req_costtime_avg: { @@ -332,7 +332,7 @@ export default { { if (!val) { - const key = ['req_suc_rate', 'req_costtime_avg'].includes(params.row.metric) ? params.row.metric : 'other' + const key = ['req_fail_rate', 'req_costtime_avg'].includes(params.row.metric) ? params.row.metric : 'other' Vue.set(this.configInfo.metric_list[params.index], 'range_config', cloneDeep(initRangeConfigMap[key])) } this.configInfo.metric_list[params.index].auto_alarm = val @@ -520,8 +520,8 @@ export default { 'code' ], color_group: '#20a162', - auto_alarm: true, - range_config: cloneDeep(initRangeConfigMap.req_suc_rate) + auto_alarm: false, + range_config: cloneDeep(initRangeConfigMap.other) }, { log_param_name: 'code', @@ -539,13 +539,13 @@ export default { log_param_name: 'code', metric: 'req_fail_rate', display_name: this.$t('m_failure_rate'), - agg_type: '100-100*{req_suc_count}/{req_count}', + agg_type: '100*{req_fail_count_detail}/{req_count}', tag_config: [ 'code' ], color_group: '#7c1823', - auto_alarm: false, - range_config: cloneDeep(initRangeConfigMap.other) + auto_alarm: true, + range_config: cloneDeep(initRangeConfigMap.req_fail_rate) }, { log_param_name: 'code', diff --git a/monitor-ui/src/views/monitor-config/log-template-config/standard-regex.vue b/monitor-ui/src/views/monitor-config/log-template-config/standard-regex.vue index c19a40c6c..043c97cdc 100644 --- a/monitor-ui/src/views/monitor-config/log-template-config/standard-regex.vue +++ b/monitor-ui/src/views/monitor-config/log-template-config/standard-regex.vue @@ -132,10 +132,10 @@ import Vue from 'vue' import {thresholdList, lastList} from '@/assets/config/common-config.js' const initRangeConfigMap = { - req_suc_rate: { - operator: '<', - threshold: '90', - time: '60', + req_fail_rate: { + operator: '>', + threshold: '10', + time: '0', time_unit: 's' }, req_costtime_avg: { @@ -285,7 +285,7 @@ export default { { if (!val) { - const key = ['req_suc_rate', 'req_costtime_avg'].includes(params.row.metric) ? params.row.metric : 'other' + const key = ['req_fail_rate', 'req_costtime_avg'].includes(params.row.metric) ? params.row.metric : 'other' Vue.set(this.configInfo.metric_list[params.index], 'range_config', cloneDeep(initRangeConfigMap[key])) } this.configInfo.metric_list[params.index].auto_alarm = val @@ -456,8 +456,8 @@ export default { 'code' ], color_group: '#20a162', - auto_alarm: true, - range_config: cloneDeep(initRangeConfigMap.req_suc_rate) + auto_alarm: false, + range_config: cloneDeep(initRangeConfigMap.other) }, { log_param_name: 'code', @@ -475,13 +475,13 @@ export default { log_param_name: 'code', metric: 'req_fail_rate', display_name: this.$t('m_failure_rate'), - agg_type: '100-100*{req_suc_count}/{req_count}', + agg_type: '100*{req_fail_count_detail}/{req_count}', tag_config: [ 'code' ], color_group: '#7c1823', - auto_alarm: false, - range_config: cloneDeep(initRangeConfigMap.other) + auto_alarm: true, + range_config: cloneDeep(initRangeConfigMap.req_fail_rate) }, { log_param_name: 'code',