From 3f8c16f82906cf0d568b54cb36f33229c14ee45d Mon Sep 17 00:00:00 2001 From: jingyd66 <1071948456@qq.com> Date: Mon, 24 Jun 2024 16:11:14 +0800 Subject: [PATCH 01/10] add rca index_ddl_error_scene --- handler/rca/scene/index_ddl_error_scene.py | 226 +++++++++++++++++++++ 1 file changed, 226 insertions(+) create mode 100644 handler/rca/scene/index_ddl_error_scene.py diff --git a/handler/rca/scene/index_ddl_error_scene.py b/handler/rca/scene/index_ddl_error_scene.py new file mode 100644 index 00000000..585fa47f --- /dev/null +++ b/handler/rca/scene/index_ddl_error_scene.py @@ -0,0 +1,226 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/06/01 +@file: index_ddl_error_scene.py +@desc: +""" +import re + +from handler.rca.rca_exception import RCAInitException, RCAExecuteException +from handler.rca.rca_handler import RcaScene, RCA_ResultRecord +from common.tool import StringUtils + + +class IndexDDLErrorScene(RcaScene): + def __init__(self): + super().__init__() + self.index_table_id = None + self.work_path = None + self.estimated_size = None + self.estimated_data_size = None + self.index_name = None + self.action_type = None + self.table_id = None + self.tenant_id = None + + def init(self, context): + super().init(context) + ## observer version>4.2.3.0 + observer_version = self.observer_version + if observer_version is None or len(observer_version.strip()) == 0: + raise RCAInitException("observer version is None. Please check the NODES conf.") + if not (observer_version == "4.2.3.0" or StringUtils.compare_versions_greater(observer_version, "4.2.3.0")): + self.stdio.error("observer version is {0}, which is less than 4.2.3.0.".format(observer_version)) + raise RCAInitException("observer version is {0}, which is less than 4.2.3.0.".format(observer_version)) + if self.ob_connector is None: + raise RCAInitException("ob_connector is None. Please check the NODES conf.") + self.verbose("observer version is {0}.".format(observer_version)) + # check table_name and tenant_name and database_name and index_name + table_name = self.input_parameters.get("table_name") + tenant_name = self.input_parameters.get("tenant_name") + action_type = self.input_parameters.get("action_type") + index_name = self.input_parameters.get("index_name") + database_name = self.input_parameters.get("database_name") + if table_name is None or table_name == "" or tenant_name is None or tenant_name == "" or index_name is None or index_name=="" or database_name is None or database_name=="": + raise RCAInitException("table_name or tenant_name or database_name or index_name is None. Please check the input parameters.") + + tenant_data = self.ob_connector.execute_sql("select tenant_id from oceanbase.__all_tenant where tenant_name = '{0}';".format(tenant_name)) + if len(tenant_data) == 0: + raise RCAInitException("can not find tenant id by tenant name: {0}. Please check the tenant name.".format(tenant_name)) + self.tenant_id = tenant_data[0][0] + self.verbose("tenant_id is {0}".format(self.tenant_id)) + if self.tenant_id is None: + raise RCAInitException("can not find tenant id by tenant name: {0}. Please check the tenant name.".format(tenant_name)) + database_id_data=self.ob_connector.execute_sql("select database_id from oceanbase.__all_database where database_name = '{0}';".format(database_name)) + if len(database_id_data) == 0: + raise RCAInitException("can not find database id by database name: {0}. Please check the table name.".format(database_name)) + self.database_id = database_id_data[0][0] + self.verbose("database_id is{0}".format(self.database_id)) + if self.database_id is None: + raise RCAInitException("can not find database id by tenant name: {0}. Please check the database name.".format(database_name)) + table_id_data = self.ob_connector.execute_sql("select table_id from oceanbase.__all_virtual_table where table_name='{0}' and tenant_id='{1}' and database_id='{2}';".format(table_name,self.tenant_id,self.database_id)) + if len(table_id_data) == 0: + raise RCAInitException("can not find table id by table name: {0}. Please check the table name.".format(table_name)) + self.table_id = table_id_data[0][0] + self.verbose("table_id is{0}".format(self.table_id)) + if self.table_id is None: + raise RCAInitException("can not find table id by table name: {0}. Please check the database name.".format(table_name)) + idx_table_id_data=self.ob_connector.execute_sql("select table_id from oceanbase.__all_virtual_table_history where tenant_id ='{0}' and data_table_id='{1}' and table_name like '%{2}%' ;".format(self.tenant_id,self.table_id,index_name)) + if len(idx_table_id_data) == 0: + raise RCAInitException("can not find index table id by table name: {0}. Please check the index name.".format(index_name)) + self.index_table_id = idx_table_id_data[0][0] + self.verbose("index_table_id is{0}".format(self.index_table_id)) + if self.index_table_id is None: + raise RCAInitException("can not find index table id by table name: {0}. Please check the index name.".format(index_name)) + self.verbose("tenant_id is {0},database_id is {1}, table_id is {2},index_table_id is {3}.".format(self.tenant_id,self.database_id,self.table_id,self.index_table_id)) + + def verbose(self, info): + self.stdio.verbose("[IndexDDLErrorScene] {0}".format(info)) + # self.stdio.print("[IndexDDLErrorScene] {0}".format(info)) + + def execute(self): + try: + record = RCA_ResultRecord() + record.add_record("tenant_id is {0}".format(self.tenant_id)) + record.add_record("database_id is {0}".format(self.database_id)) + record.add_record("table_id is {0}".format(self.table_id)) + record.add_record("index_table_id is {0}".format(self.index_table_id)) + record.add_record("index_name is {0}".format(self.input_parameters.get("index_name"))) + self.verbose("start to get trace_id and task_id...") + #trace_id + trace_id_data=self.ob_connector.execute_sql("select trace_id from oceanbase.__all_virtual_ddl_error_message where tenant_id = '{0}' and object_id='{1}';".format(self.tenant_id,self.index_table_id)) + self.verbose("trace_id_data is {0}".format(trace_id_data)) + if len(trace_id_data) == 0: + # raise RCAInitException("can not find trace id by index name: {0}. Please check the table name.".format(self.index_name)) + record.add_record("tenant_id is {0}".format(self.tenant_id)) + # record.add_suggest("创建索引失败发生在发送RPC阶段。此时需要人工介入排查,请把该文件包上传到OcenBase社区{0}".format(self.store_dir)) + record.add_suggest("The index creation failure occurs during the RPC sending phase. Manual intervention is required to troubleshoot this issue. Please upload the package to the OcenBase community{0}".format(self.store_dir)) + return + self.trace_id = trace_id_data[0][0] + self.verbose("trace_id is{0}".format(self.trace_id)) + if self.trace_id is None: + raise RCAInitException("can not find trace_id id by index name: {0}. Please check the index name.".format(self.index_name)) + #task_id + task_id_data=self.ob_connector.execute_sql("select task_id from oceanbase.__all_virtual_ddl_error_message where tenant_id = '{0}' and object_id='{1}';".format(self.tenant_id,self.index_table_id)) + self.verbose("task_id_data is {0}".format(task_id_data)) + if task_id_data is None: + # record.add_suggest("创建索引失败发生在发送RPC阶段。此时需要人工介入排查,请把该文件包上传到OcenBase社区{0}".format(self.store_dir)) + record.add_suggest("The index creation failure occurs during the RPC sending phase. Manual intervention is required to troubleshoot this issue. Please upload the package to the OcenBase community{0}".format(self.store_dir)) + return + self.task_id = task_id_data[0][0] + self.verbose("task_id is {0}".format(self.task_id)) + + record.add_record("trace_id is {0}".format(self.trace_id)) + record.add_record("task_id is {0}".format(self.task_id)) + self.verbose("start to get event...") + #event_data=self.ob_connector.execute_sql("select event, value6,rs_svr_ip, rs_svr_port from oceanbase.__all_rootservice_event_history where value4 = '{0}' and value2 != 0 and event != 'switch_state' and event not like 'index build task process fail' order by gmt_create desc limit 1;".format(self.task_id)) + #新方法 + sql = "select event, value6,rs_svr_ip, rs_svr_port from oceanbase.__all_rootservice_event_history where value4 = '{0}' and value1='{1}' and value2 != 0 and event != 'switch_state' and event not like 'index build task process fail' order by gmt_create desc limit 1;".format(self.task_id,self.tenant_id) + event_data = self.ob_connector.execute_sql_return_cursor_dictionary(sql).fetchall() + self.verbose("event_data is{0}".format(event_data)) + if event_data is None: + record.add_record("需根据trace_id去每个observer节点去过滤rootservice.log") + #收集RS日志 + # rs + self.verbose("event_data is None") + self.verbose("gather rootservice.log by {0}".format(self.trace_id)) + work_path_rs = self.store_dir +"/{0}_on_rs/".format(self.trace_id) + self.gather_log.set_parameters("scope", "rootservice") + self.gather_log.grep("{0}".format(self.trace_id)) + logs_name = self.gather_log.execute(save_path=work_path_rs) + if logs_name is None or len(logs_name) <= 0: + self.verbose("no log_disk_full about trace_id:{0}".format(self.trace_id)) + return False + record.add_record(" 日志保存位置:{0}".format(work_path_rs)) + record.add_suggest("创建索引失败发生在补数据阶段,请上传{0}到OceanBase社区".format(self.store_dir)) + else: + # if event_data[0][0] is None: + # return record.add_suggest("需根据trace_id去每个observer节点去过滤rootservice.log") + # #收集RS日志 + record.add_record("event_data is {0}".format(event_data)) + # self.event=event_data[0][0] + self.event=event_data[0]["event"] + self.verbose("event is {0}".format(self.event)) + record.add_record("event is {0}".format(self.event)) + + #self.value6=event_data[0][1] + self.value6=event_data[0]["value6"] + self.verbose("value6 is {0}".format(self.value6)) + record.add_record("value6 is {0}".format(self.value6)) + ip_address = self.value6.split(":")[0].strip('"') + record.add_record("ip is {0}".format(ip_address)) + if self.event=='ddl wait trans end ctx try wait': + self.verbose("ok,event is ddl wait trans end ctx try wait") + record.add_record("event is {0},即创建索引失败发生在事务结束阶段,此时要根据trace_id:{1}捞取observer日志".format(self.event,self.trace_id)) + #收集日志 + #ddl_wait_trans_end_ctx_try_wait + self.verbose("__check_checkpoint") + work_path_ddl_wait_trans_end_ctx_try_wait = self.store_dir + "/checkpoint/" + self.gather_log.set_parameters("scope", "observer") + self.gather_log.grep("{0}".format(self.trace_id)) + logs_name = self.gather_log.execute(save_path=work_path_ddl_wait_trans_end_ctx_try_wait) + if logs_name is None or len(logs_name) <= 0: + self.verbose("no log_disk_full about trace_id:{0}".format(self.trace_id)) + return False + record.add_record(" 日志保存位置:{0}".format(work_path_ddl_wait_trans_end_ctx_try_wait)) + record.add_suggest("创建索引失败发生在事务结束阶段,请上传{0}到OceanBase社区".format(self.store_dir)) + + elif self.event=='index sstable build task finish': + self.verbose("ok,event is index sstable build task finish") + record.add_record("event is {0},即创建索引失败发生在补数据阶段,此时要根据trace_id:{1}捞取observer日志".format(self.event,self.trace_id)) + #收集日志 + # index_sstable_build_task_finish + self.verbose("__check_checkpoint") + work_path_index_sstable_build_task_finish = self.store_dir + "/checkpoint/" + self.gather_log.set_parameters("scope", "observer") + self.gather_log.grep("{0}".format(self.trace_id)) + logs_name = self.gather_log.execute(save_path=work_path_index_sstable_build_task_finish) + if logs_name is None or len(logs_name) <= 0: + self.verbose("no log_disk_full about trace_id:{0}".format(self.trace_id)) + return False + record.add_record(" 日志保存位置:{0}".format(work_path_index_sstable_build_task_finish)) + record.add_suggest("创建索引失败发生在补数据阶段,请上传{0}到OceanBase社区".format(self.store_dir)) + + else: + record.add_record("需根据trace_id去每个observer节点去过滤rootservice.log") + #收集RS日志 + # rs + self.verbose("__check_checkpoint") + work_path_rs = self.store_dir + "/checkpoint/" + self.gather_log.set_parameters("scope", "rootservice") + self.gather_log.grep("{0}".format(self.trace_id)) + logs_name = self.gather_log.execute(save_path=work_path_rs) + if logs_name is None or len(logs_name) <= 0: + self.verbose("no log_disk_full about trace_id:{0}".format(self.trace_id)) + return False + record.add_record(" 日志保存位置:{0}".format(work_path_rs)) + record.add_suggest("创建索引失败发生在补数据阶段,请上传{0}到OceanBase社区".format(self.store_dir)) + + self.Result.records.append(record) + except Exception as e: + raise RCAExecuteException("IndexDDLErrorScene execute error: {0}".format(e)) + finally: + self.stdio.verbose("end IndexDDLErrorScene execute") + + def export_result(self): + super().export_result() + + def get_scene_info(self): + return { + "name": "index_ddl_error", + "info_en": "Troubleshooting errors in indexing execution. ", + "info_cn": '建索引执行报错问题排查', + } + +index_ddl_error = IndexDDLErrorScene() From 981bf70604bfb16490e81a33a8c2963295e0be16 Mon Sep 17 00:00:00 2001 From: jingyd66 <1071948456@qq.com> Date: Mon, 24 Jun 2024 16:26:09 +0800 Subject: [PATCH 02/10] update0624 --- handler/rca/scene/index_ddl_error_scene.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/handler/rca/scene/index_ddl_error_scene.py b/handler/rca/scene/index_ddl_error_scene.py index 585fa47f..be8ad7a1 100644 --- a/handler/rca/scene/index_ddl_error_scene.py +++ b/handler/rca/scene/index_ddl_error_scene.py @@ -11,7 +11,7 @@ # See the Mulan PSL v2 for more details. """ -@time: 2024/06/01 +@time: 2024/06/02 @file: index_ddl_error_scene.py @desc: """ From 57e9cfeead507b2b04485e80d3850a65753ec91d Mon Sep 17 00:00:00 2001 From: jingyd66 <1071948456@qq.com> Date: Mon, 24 Jun 2024 16:43:37 +0800 Subject: [PATCH 03/10] 1111 --- handler/rca/scene/index_ddl_error_scene.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/handler/rca/scene/index_ddl_error_scene.py b/handler/rca/scene/index_ddl_error_scene.py index be8ad7a1..f5e0f134 100644 --- a/handler/rca/scene/index_ddl_error_scene.py +++ b/handler/rca/scene/index_ddl_error_scene.py @@ -11,7 +11,7 @@ # See the Mulan PSL v2 for more details. """ -@time: 2024/06/02 +@time: 2024/06/03 @file: index_ddl_error_scene.py @desc: """ From a354d05582367d6a90f18149930f22d7aa328394 Mon Sep 17 00:00:00 2001 From: jingyd66 <1071948456@qq.com> Date: Tue, 2 Jul 2024 10:30:12 +0800 Subject: [PATCH 04/10] update0702 --- handler/rca/scene/index_ddl_error_scene.py | 67 ++++++++++++---------- 1 file changed, 36 insertions(+), 31 deletions(-) diff --git a/handler/rca/scene/index_ddl_error_scene.py b/handler/rca/scene/index_ddl_error_scene.py index f5e0f134..f03fd233 100644 --- a/handler/rca/scene/index_ddl_error_scene.py +++ b/handler/rca/scene/index_ddl_error_scene.py @@ -11,7 +11,7 @@ # See the Mulan PSL v2 for more details. """ -@time: 2024/06/03 +@time: 2024/06/04 @file: index_ddl_error_scene.py @desc: """ @@ -76,7 +76,9 @@ def init(self, context): self.verbose("table_id is{0}".format(self.table_id)) if self.table_id is None: raise RCAInitException("can not find table id by table name: {0}. Please check the database name.".format(table_name)) - idx_table_id_data=self.ob_connector.execute_sql("select table_id from oceanbase.__all_virtual_table_history where tenant_id ='{0}' and data_table_id='{1}' and table_name like '%{2}%' ;".format(self.tenant_id,self.table_id,index_name)) + # idx_table_id_data=self.ob_connector.execute_sql("select table_id from oceanbase.__all_virtual_table_history where tenant_id ='{0}' and data_table_id='{1}' and table_name like '%{2}%' ;".format(self.tenant_id,self.table_id,index_name)) + idx_table_id_data=self.ob_connector.execute_sql("select table_id from oceanbase.__all_virtual_table_history where tenant_id ='{0}' and data_table_id='{1}' and table_name like '%{2}%' order by gmt_create desc limit 1;".format(self.tenant_id,self.table_id,index_name)) + if len(idx_table_id_data) == 0: raise RCAInitException("can not find index table id by table name: {0}. Please check the index name.".format(index_name)) self.index_table_id = idx_table_id_data[0][0] @@ -102,8 +104,6 @@ def execute(self): trace_id_data=self.ob_connector.execute_sql("select trace_id from oceanbase.__all_virtual_ddl_error_message where tenant_id = '{0}' and object_id='{1}';".format(self.tenant_id,self.index_table_id)) self.verbose("trace_id_data is {0}".format(trace_id_data)) if len(trace_id_data) == 0: - # raise RCAInitException("can not find trace id by index name: {0}. Please check the table name.".format(self.index_name)) - record.add_record("tenant_id is {0}".format(self.tenant_id)) # record.add_suggest("创建索引失败发生在发送RPC阶段。此时需要人工介入排查,请把该文件包上传到OcenBase社区{0}".format(self.store_dir)) record.add_suggest("The index creation failure occurs during the RPC sending phase. Manual intervention is required to troubleshoot this issue. Please upload the package to the OcenBase community{0}".format(self.store_dir)) return @@ -130,7 +130,7 @@ def execute(self): event_data = self.ob_connector.execute_sql_return_cursor_dictionary(sql).fetchall() self.verbose("event_data is{0}".format(event_data)) if event_data is None: - record.add_record("需根据trace_id去每个observer节点去过滤rootservice.log") + record.add_record("gather rootservice.log by {0}".format(self.trace_id)) #收集RS日志 # rs self.verbose("event_data is None") @@ -142,70 +142,75 @@ def execute(self): if logs_name is None or len(logs_name) <= 0: self.verbose("no log_disk_full about trace_id:{0}".format(self.trace_id)) return False - record.add_record(" 日志保存位置:{0}".format(work_path_rs)) - record.add_suggest("创建索引失败发生在补数据阶段,请上传{0}到OceanBase社区".format(self.store_dir)) + record.add_record("Log saving location:{0}".format(work_path_rs)) + # record.add_suggest("创建索引失败发生在其他阶段,请上传{0}到OceanBase社区".format(self.store_dir)) + record.add_suggest("The index creation failed during the other phase. Please upload {0} to the OceanBase community".format(self.store_dir)) else: - # if event_data[0][0] is None: - # return record.add_suggest("需根据trace_id去每个observer节点去过滤rootservice.log") - # #收集RS日志 record.add_record("event_data is {0}".format(event_data)) - # self.event=event_data[0][0] self.event=event_data[0]["event"] self.verbose("event is {0}".format(self.event)) record.add_record("event is {0}".format(self.event)) - - #self.value6=event_data[0][1] self.value6=event_data[0]["value6"] - self.verbose("value6 is {0}".format(self.value6)) - record.add_record("value6 is {0}".format(self.value6)) - ip_address = self.value6.split(":")[0].strip('"') + self.inner_sql_execute_addr=self.value6 + self.verbose("inner_sql_execute_addr is {0}".format(self.inner_sql_execute_addr)) + record.add_record("inner_sql_execute_addr is {0}".format(self.inner_sql_execute_addr)) + ip_address = self.inner_sql_execute_addr.split(":")[0].strip('"') record.add_record("ip is {0}".format(ip_address)) if self.event=='ddl wait trans end ctx try wait': self.verbose("ok,event is ddl wait trans end ctx try wait") - record.add_record("event is {0},即创建索引失败发生在事务结束阶段,此时要根据trace_id:{1}捞取observer日志".format(self.event,self.trace_id)) + # record.add_record("event is {0},即创建索引失败发生在事务结束阶段,此时要根据trace_id:{1}捞取observer日志".format(self.event,self.trace_id)) + record.add_record("event is {0},The failure of index creation occurred during the transaction end phase. In this case, the observer logs need to be retrieved based on the trace_id: {1}".format(self.event,self.trace_id)) #收集日志 #ddl_wait_trans_end_ctx_try_wait - self.verbose("__check_checkpoint") - work_path_ddl_wait_trans_end_ctx_try_wait = self.store_dir + "/checkpoint/" + self.verbose("gather observer.log by {0}".format(self.trace_id)) + work_path_ddl_wait_trans_end_ctx_try_wait = self.store_dir +"/{0}_on_obs/".format(self.trace_id) + # work_path_ddl_wait_trans_end_ctx_try_wait = self.store_dir + "/checkpoint/" self.gather_log.set_parameters("scope", "observer") self.gather_log.grep("{0}".format(self.trace_id)) logs_name = self.gather_log.execute(save_path=work_path_ddl_wait_trans_end_ctx_try_wait) if logs_name is None or len(logs_name) <= 0: self.verbose("no log_disk_full about trace_id:{0}".format(self.trace_id)) return False - record.add_record(" 日志保存位置:{0}".format(work_path_ddl_wait_trans_end_ctx_try_wait)) - record.add_suggest("创建索引失败发生在事务结束阶段,请上传{0}到OceanBase社区".format(self.store_dir)) + record.add_record(" Log saving location:{0}".format(work_path_ddl_wait_trans_end_ctx_try_wait)) + # record.add_suggest("创建索引失败发生在事务结束阶段,请上传{0}到OceanBase社区".format(self.store_dir)) + record.add_suggest("The failure of index creation occurred during the transaction completion phase. Please upload {0} to the OceanBase community".format(self.store_dir)) elif self.event=='index sstable build task finish': self.verbose("ok,event is index sstable build task finish") - record.add_record("event is {0},即创建索引失败发生在补数据阶段,此时要根据trace_id:{1}捞取observer日志".format(self.event,self.trace_id)) + # record.add_record("event is {0},即创建索引失败发生在补数据阶段,此时要根据trace_id:{1}捞取observer日志".format(self.event,self.trace_id)) + record.add_record("event is {0},The failure of index creation occurred during the data replenishment phase. In this case, the observer logs need to be retrieved based on the trace_id: {1}".format(self.event,self.trace_id)) + self.verbose("gather observer.log by {0}".format(self.trace_id)) #收集日志 # index_sstable_build_task_finish - self.verbose("__check_checkpoint") - work_path_index_sstable_build_task_finish = self.store_dir + "/checkpoint/" + # self.verbose("__check_checkpoint") + work_path_index_sstable_build_task_finish = self.store_dir +"/{0}_on_obs/".format(self.trace_id) self.gather_log.set_parameters("scope", "observer") self.gather_log.grep("{0}".format(self.trace_id)) logs_name = self.gather_log.execute(save_path=work_path_index_sstable_build_task_finish) if logs_name is None or len(logs_name) <= 0: self.verbose("no log_disk_full about trace_id:{0}".format(self.trace_id)) return False - record.add_record(" 日志保存位置:{0}".format(work_path_index_sstable_build_task_finish)) - record.add_suggest("创建索引失败发生在补数据阶段,请上传{0}到OceanBase社区".format(self.store_dir)) + record.add_record(" Log saving location:{0}".format(work_path_index_sstable_build_task_finish)) + # record.add_suggest("创建索引失败发生在补数据阶段,请上传{0}到OceanBase社区".format(self.store_dir)) + record.add_suggest("The index creation failed during the data replenishment phase. Please upload {0} to the OceanBase community".format(self.store_dir)) + else: - record.add_record("需根据trace_id去每个observer节点去过滤rootservice.log") + record.add_record("gather rootservice.log by {0}".format(self.trace_id)) #收集RS日志 # rs - self.verbose("__check_checkpoint") - work_path_rs = self.store_dir + "/checkpoint/" + self.verbose("event_data is None") + self.verbose("gather rootservice.log by {0}".format(self.trace_id)) + work_path_rs = self.store_dir +"/{0}_on_rs/".format(self.trace_id) self.gather_log.set_parameters("scope", "rootservice") self.gather_log.grep("{0}".format(self.trace_id)) logs_name = self.gather_log.execute(save_path=work_path_rs) if logs_name is None or len(logs_name) <= 0: self.verbose("no log_disk_full about trace_id:{0}".format(self.trace_id)) return False - record.add_record(" 日志保存位置:{0}".format(work_path_rs)) - record.add_suggest("创建索引失败发生在补数据阶段,请上传{0}到OceanBase社区".format(self.store_dir)) + record.add_record("Log saving location:{0}".format(work_path_rs)) + # record.add_suggest("创建索引失败发生在其他阶段,请上传{0}到OceanBase社区".format(self.store_dir)) + record.add_suggest("The index creation failed during the other phase. Please upload {0} to the OceanBase community".format(self.store_dir)) self.Result.records.append(record) except Exception as e: From 6a1ebef396289bd329688bcedccecb7596a8fec5 Mon Sep 17 00:00:00 2001 From: jingyd66 <1071948456@qq.com> Date: Tue, 2 Jul 2024 10:31:24 +0800 Subject: [PATCH 05/10] update0702 --- handler/rca/scene/lock_conflict_scene.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/handler/rca/scene/lock_conflict_scene.py b/handler/rca/scene/lock_conflict_scene.py index 461df350..c2de51be 100644 --- a/handler/rca/scene/lock_conflict_scene.py +++ b/handler/rca/scene/lock_conflict_scene.py @@ -15,9 +15,10 @@ @file: lock_conflict_scene.py @desc: """ +import json from handler.rca.rca_exception import RCAInitException, RCANotNeedExecuteException from handler.rca.rca_handler import RcaScene, RCA_ResultRecord -from common.tool import StringUtils +from common.tool import StringUtils, DateTimeEncoder class LockConflictScene(RcaScene): @@ -36,7 +37,7 @@ def init(self, context): def execute(self): if self.observer_version == "4.2.0.0" or StringUtils.compare_versions_greater(self.observer_version, "4.2.0.0"): self.__execute_4_2() - elif StringUtils.compare_versions_greater("4.2.2.0", self.observer_version): + elif StringUtils.compare_versions_greater("4.2.0.0", self.observer_version): self.__execute_old() else: raise Exception("observer version is {0}. Not support".format(self.observer_version)) @@ -67,7 +68,7 @@ def __execute_4_2(self): trans_record.add_record("get holding_lock trans_id:{0}".format(trans_id)) holding_lock_session_id = trans_id self.stdio.verbose("get holding lock SESSION_ID by trans_id:{0}".format(trans_id)) - cursor_by_trans_id = self.ob_connector.execute_sql_return_cursor_dictionary('select * from oceanbase.V$OB_TRANSACTION_PARTICIPANTS where TX_ID="{0}";'.format(holding_lock_session_id)) + cursor_by_trans_id = self.ob_connector.execute_sql_return_cursor_dictionary('select * from oceanbase.GV$OB_TRANSACTION_PARTICIPANTS where TX_ID="{0}";'.format(holding_lock_session_id)) holding_lock_session_id_datas = cursor_by_trans_id.fetchall() holding_lock_session_id = "not get" self.stdio.verbose("get sql_info by holding_lock_session_id:{0}".format(holding_lock_session_id_datas)) @@ -81,7 +82,7 @@ def __execute_4_2(self): wait_lock_trans_id = OB_LOCKS_data["TRANS_ID"] trans_record.add_record("wait_lock_trans_id is {0}".format(wait_lock_trans_id)) - cursor_by_trans_id = self.ob_connector.execute_sql_return_cursor_dictionary('select * from oceanbase.V$OB_TRANSACTION_PARTICIPANTS where TX_ID="{0}";'.format(wait_lock_trans_id)) + cursor_by_trans_id = self.ob_connector.execute_sql_return_cursor_dictionary('select * from oceanbase.GV$OB_TRANSACTION_PARTICIPANTS where TX_ID="{0}";'.format(wait_lock_trans_id)) wait_lock_session_datas = cursor_by_trans_id.fetchall() self.stdio.verbose("get sql_info by holding_lock_session_id:{0}".format(holding_lock_session_id)) @@ -100,10 +101,10 @@ def __execute_4_2(self): cursor_check_switch = self.ob_connector.execute_sql_return_cursor_dictionary("SHOW PARAMETERS LIKE '%enable_sql_audit%';") audit_switch_value = cursor_check_switch.fetchone().get("value") if audit_switch_value.strip().upper() == "TRUE": - holding_lock_sql_info_cursor = self.ob_connector.execute_sql_return_cursor_dictionary('SELECT * FROM oceanbase.v$OB_SQL_AUDIT where SID="{0}";'.format(holding_lock_session_id)) + holding_lock_sql_info_cursor = self.ob_connector.execute_sql_return_cursor_dictionary('SELECT * FROM oceanbase.gv$OB_SQL_AUDIT where SID="{0}";'.format(holding_lock_session_id)) holding_lock_sql_info = holding_lock_sql_info_cursor.fetchall() if len(holding_lock_sql_info) == 0: - trans_record.add_record("holding_lock_session_id: {0}; not find sql_info on v$OB_SQL_AUDIT".format(holding_lock_session_id)) + trans_record.add_record("holding_lock_session_id: {0}; not find sql_info on gv$OB_SQL_AUDIT".format(holding_lock_session_id)) else: holding_lock_sql_info_json_data = json.dumps(holding_lock_sql_info, cls=DateTimeEncoder) file_name = "{0}/rca_holding_lock_sql_info_{1}.json".format(self.local_path, holding_lock_session_id) From fdc4836cc296f4fc73c90b890977aa2963cda230 Mon Sep 17 00:00:00 2001 From: jingyd66 <1071948456@qq.com> Date: Thu, 4 Jul 2024 14:41:32 +0800 Subject: [PATCH 06/10] update0704 --- handler/rca/scene/index_ddl_error_scene.py | 94 +++++++++++----------- handler/rca/scene/lock_conflict_scene.py | 11 ++- 2 files changed, 58 insertions(+), 47 deletions(-) diff --git a/handler/rca/scene/index_ddl_error_scene.py b/handler/rca/scene/index_ddl_error_scene.py index f03fd233..e4a458e7 100644 --- a/handler/rca/scene/index_ddl_error_scene.py +++ b/handler/rca/scene/index_ddl_error_scene.py @@ -33,7 +33,7 @@ def __init__(self): self.action_type = None self.table_id = None self.tenant_id = None - + def init(self, context): super().init(context) ## observer version>4.2.3.0 @@ -46,13 +46,13 @@ def init(self, context): if self.ob_connector is None: raise RCAInitException("ob_connector is None. Please check the NODES conf.") self.verbose("observer version is {0}.".format(observer_version)) - # check table_name and tenant_name and database_name and index_name + # check table_name and tenant_name and database_name and index_name table_name = self.input_parameters.get("table_name") tenant_name = self.input_parameters.get("tenant_name") action_type = self.input_parameters.get("action_type") index_name = self.input_parameters.get("index_name") database_name = self.input_parameters.get("database_name") - if table_name is None or table_name == "" or tenant_name is None or tenant_name == "" or index_name is None or index_name=="" or database_name is None or database_name=="": + if table_name is None or table_name == "" or tenant_name is None or tenant_name == "" or index_name is None or index_name == "" or database_name is None or database_name == "": raise RCAInitException("table_name or tenant_name or database_name or index_name is None. Please check the input parameters.") tenant_data = self.ob_connector.execute_sql("select tenant_id from oceanbase.__all_tenant where tenant_name = '{0}';".format(tenant_name)) @@ -62,14 +62,14 @@ def init(self, context): self.verbose("tenant_id is {0}".format(self.tenant_id)) if self.tenant_id is None: raise RCAInitException("can not find tenant id by tenant name: {0}. Please check the tenant name.".format(tenant_name)) - database_id_data=self.ob_connector.execute_sql("select database_id from oceanbase.__all_database where database_name = '{0}';".format(database_name)) + database_id_data = self.ob_connector.execute_sql("select database_id from oceanbase.__all_database where database_name = '{0}';".format(database_name)) if len(database_id_data) == 0: raise RCAInitException("can not find database id by database name: {0}. Please check the table name.".format(database_name)) self.database_id = database_id_data[0][0] self.verbose("database_id is{0}".format(self.database_id)) if self.database_id is None: raise RCAInitException("can not find database id by tenant name: {0}. Please check the database name.".format(database_name)) - table_id_data = self.ob_connector.execute_sql("select table_id from oceanbase.__all_virtual_table where table_name='{0}' and tenant_id='{1}' and database_id='{2}';".format(table_name,self.tenant_id,self.database_id)) + table_id_data = self.ob_connector.execute_sql("select table_id from oceanbase.__all_virtual_table where table_name='{0}' and tenant_id='{1}' and database_id='{2}';".format(table_name, self.tenant_id, self.database_id)) if len(table_id_data) == 0: raise RCAInitException("can not find table id by table name: {0}. Please check the table name.".format(table_name)) self.table_id = table_id_data[0][0] @@ -77,7 +77,9 @@ def init(self, context): if self.table_id is None: raise RCAInitException("can not find table id by table name: {0}. Please check the database name.".format(table_name)) # idx_table_id_data=self.ob_connector.execute_sql("select table_id from oceanbase.__all_virtual_table_history where tenant_id ='{0}' and data_table_id='{1}' and table_name like '%{2}%' ;".format(self.tenant_id,self.table_id,index_name)) - idx_table_id_data=self.ob_connector.execute_sql("select table_id from oceanbase.__all_virtual_table_history where tenant_id ='{0}' and data_table_id='{1}' and table_name like '%{2}%' order by gmt_create desc limit 1;".format(self.tenant_id,self.table_id,index_name)) + idx_table_id_data = self.ob_connector.execute_sql( + "select table_id from oceanbase.__all_virtual_table_history where tenant_id ='{0}' and data_table_id='{1}' and table_name like '%{2}%' order by gmt_create desc limit 1;".format(self.tenant_id, self.table_id, index_name) + ) if len(idx_table_id_data) == 0: raise RCAInitException("can not find index table id by table name: {0}. Please check the index name.".format(index_name)) @@ -85,7 +87,7 @@ def init(self, context): self.verbose("index_table_id is{0}".format(self.index_table_id)) if self.index_table_id is None: raise RCAInitException("can not find index table id by table name: {0}. Please check the index name.".format(index_name)) - self.verbose("tenant_id is {0},database_id is {1}, table_id is {2},index_table_id is {3}.".format(self.tenant_id,self.database_id,self.table_id,self.index_table_id)) + self.verbose("tenant_id is {0},database_id is {1}, table_id is {2},index_table_id is {3}.".format(self.tenant_id, self.database_id, self.table_id, self.index_table_id)) def verbose(self, info): self.stdio.verbose("[IndexDDLErrorScene] {0}".format(info)) @@ -100,19 +102,19 @@ def execute(self): record.add_record("index_table_id is {0}".format(self.index_table_id)) record.add_record("index_name is {0}".format(self.input_parameters.get("index_name"))) self.verbose("start to get trace_id and task_id...") - #trace_id - trace_id_data=self.ob_connector.execute_sql("select trace_id from oceanbase.__all_virtual_ddl_error_message where tenant_id = '{0}' and object_id='{1}';".format(self.tenant_id,self.index_table_id)) + # trace_id + trace_id_data = self.ob_connector.execute_sql("select trace_id from oceanbase.__all_virtual_ddl_error_message where tenant_id = '{0}' and object_id='{1}';".format(self.tenant_id, self.index_table_id)) self.verbose("trace_id_data is {0}".format(trace_id_data)) if len(trace_id_data) == 0: # record.add_suggest("创建索引失败发生在发送RPC阶段。此时需要人工介入排查,请把该文件包上传到OcenBase社区{0}".format(self.store_dir)) record.add_suggest("The index creation failure occurs during the RPC sending phase. Manual intervention is required to troubleshoot this issue. Please upload the package to the OcenBase community{0}".format(self.store_dir)) - return + return self.trace_id = trace_id_data[0][0] self.verbose("trace_id is{0}".format(self.trace_id)) if self.trace_id is None: raise RCAInitException("can not find trace_id id by index name: {0}. Please check the index name.".format(self.index_name)) - #task_id - task_id_data=self.ob_connector.execute_sql("select task_id from oceanbase.__all_virtual_ddl_error_message where tenant_id = '{0}' and object_id='{1}';".format(self.tenant_id,self.index_table_id)) + # task_id + task_id_data = self.ob_connector.execute_sql("select task_id from oceanbase.__all_virtual_ddl_error_message where tenant_id = '{0}' and object_id='{1}';".format(self.tenant_id, self.index_table_id)) self.verbose("task_id_data is {0}".format(task_id_data)) if task_id_data is None: # record.add_suggest("创建索引失败发生在发送RPC阶段。此时需要人工介入排查,请把该文件包上传到OcenBase社区{0}".format(self.store_dir)) @@ -124,46 +126,48 @@ def execute(self): record.add_record("trace_id is {0}".format(self.trace_id)) record.add_record("task_id is {0}".format(self.task_id)) self.verbose("start to get event...") - #event_data=self.ob_connector.execute_sql("select event, value6,rs_svr_ip, rs_svr_port from oceanbase.__all_rootservice_event_history where value4 = '{0}' and value2 != 0 and event != 'switch_state' and event not like 'index build task process fail' order by gmt_create desc limit 1;".format(self.task_id)) - #新方法 - sql = "select event, value6,rs_svr_ip, rs_svr_port from oceanbase.__all_rootservice_event_history where value4 = '{0}' and value1='{1}' and value2 != 0 and event != 'switch_state' and event not like 'index build task process fail' order by gmt_create desc limit 1;".format(self.task_id,self.tenant_id) + # event_data=self.ob_connector.execute_sql("select event, value6,rs_svr_ip, rs_svr_port from oceanbase.__all_rootservice_event_history where value4 = '{0}' and value2 != 0 and event != 'switch_state' and event not like 'index build task process fail' order by gmt_create desc limit 1;".format(self.task_id)) + # 新方法 + sql = "select event, value6,rs_svr_ip, rs_svr_port from oceanbase.__all_rootservice_event_history where value4 = '{0}' and value1='{1}' and value2 != 0 and event != 'switch_state' and event not like 'index build task process fail' order by gmt_create desc limit 1;".format( + self.task_id, self.tenant_id + ) event_data = self.ob_connector.execute_sql_return_cursor_dictionary(sql).fetchall() self.verbose("event_data is{0}".format(event_data)) if event_data is None: - record.add_record("gather rootservice.log by {0}".format(self.trace_id)) - #收集RS日志 - # rs - self.verbose("event_data is None") - self.verbose("gather rootservice.log by {0}".format(self.trace_id)) - work_path_rs = self.store_dir +"/{0}_on_rs/".format(self.trace_id) - self.gather_log.set_parameters("scope", "rootservice") - self.gather_log.grep("{0}".format(self.trace_id)) - logs_name = self.gather_log.execute(save_path=work_path_rs) - if logs_name is None or len(logs_name) <= 0: - self.verbose("no log_disk_full about trace_id:{0}".format(self.trace_id)) - return False - record.add_record("Log saving location:{0}".format(work_path_rs)) - # record.add_suggest("创建索引失败发生在其他阶段,请上传{0}到OceanBase社区".format(self.store_dir)) - record.add_suggest("The index creation failed during the other phase. Please upload {0} to the OceanBase community".format(self.store_dir)) + record.add_record("gather rootservice.log by {0}".format(self.trace_id)) + # 收集RS日志 + # rs + self.verbose("event_data is None") + self.verbose("gather rootservice.log by {0}".format(self.trace_id)) + work_path_rs = self.store_dir + "/{0}_on_rs/".format(self.trace_id) + self.gather_log.set_parameters("scope", "rootservice") + self.gather_log.grep("{0}".format(self.trace_id)) + logs_name = self.gather_log.execute(save_path=work_path_rs) + if logs_name is None or len(logs_name) <= 0: + self.verbose("no log_disk_full about trace_id:{0}".format(self.trace_id)) + return False + record.add_record("Log saving location:{0}".format(work_path_rs)) + # record.add_suggest("创建索引失败发生在其他阶段,请上传{0}到OceanBase社区".format(self.store_dir)) + record.add_suggest("The index creation failed during the other phase. Please upload {0} to the OceanBase community".format(self.store_dir)) else: record.add_record("event_data is {0}".format(event_data)) - self.event=event_data[0]["event"] + self.event = event_data[0]["event"] self.verbose("event is {0}".format(self.event)) record.add_record("event is {0}".format(self.event)) - self.value6=event_data[0]["value6"] - self.inner_sql_execute_addr=self.value6 + self.value6 = event_data[0]["value6"] + self.inner_sql_execute_addr = self.value6 self.verbose("inner_sql_execute_addr is {0}".format(self.inner_sql_execute_addr)) record.add_record("inner_sql_execute_addr is {0}".format(self.inner_sql_execute_addr)) ip_address = self.inner_sql_execute_addr.split(":")[0].strip('"') record.add_record("ip is {0}".format(ip_address)) - if self.event=='ddl wait trans end ctx try wait': + if self.event == 'ddl wait trans end ctx try wait': self.verbose("ok,event is ddl wait trans end ctx try wait") # record.add_record("event is {0},即创建索引失败发生在事务结束阶段,此时要根据trace_id:{1}捞取observer日志".format(self.event,self.trace_id)) - record.add_record("event is {0},The failure of index creation occurred during the transaction end phase. In this case, the observer logs need to be retrieved based on the trace_id: {1}".format(self.event,self.trace_id)) - #收集日志 - #ddl_wait_trans_end_ctx_try_wait + record.add_record("event is {0},The failure of index creation occurred during the transaction end phase. In this case, the observer logs need to be retrieved based on the trace_id: {1}".format(self.event, self.trace_id)) + # 收集日志 + # ddl_wait_trans_end_ctx_try_wait self.verbose("gather observer.log by {0}".format(self.trace_id)) - work_path_ddl_wait_trans_end_ctx_try_wait = self.store_dir +"/{0}_on_obs/".format(self.trace_id) + work_path_ddl_wait_trans_end_ctx_try_wait = self.store_dir + "/{0}_on_obs/".format(self.trace_id) # work_path_ddl_wait_trans_end_ctx_try_wait = self.store_dir + "/checkpoint/" self.gather_log.set_parameters("scope", "observer") self.gather_log.grep("{0}".format(self.trace_id)) @@ -175,15 +179,15 @@ def execute(self): # record.add_suggest("创建索引失败发生在事务结束阶段,请上传{0}到OceanBase社区".format(self.store_dir)) record.add_suggest("The failure of index creation occurred during the transaction completion phase. Please upload {0} to the OceanBase community".format(self.store_dir)) - elif self.event=='index sstable build task finish': + elif self.event == 'index sstable build task finish': self.verbose("ok,event is index sstable build task finish") # record.add_record("event is {0},即创建索引失败发生在补数据阶段,此时要根据trace_id:{1}捞取observer日志".format(self.event,self.trace_id)) - record.add_record("event is {0},The failure of index creation occurred during the data replenishment phase. In this case, the observer logs need to be retrieved based on the trace_id: {1}".format(self.event,self.trace_id)) + record.add_record("event is {0},The failure of index creation occurred during the data replenishment phase. In this case, the observer logs need to be retrieved based on the trace_id: {1}".format(self.event, self.trace_id)) self.verbose("gather observer.log by {0}".format(self.trace_id)) - #收集日志 + # 收集日志 # index_sstable_build_task_finish # self.verbose("__check_checkpoint") - work_path_index_sstable_build_task_finish = self.store_dir +"/{0}_on_obs/".format(self.trace_id) + work_path_index_sstable_build_task_finish = self.store_dir + "/{0}_on_obs/".format(self.trace_id) self.gather_log.set_parameters("scope", "observer") self.gather_log.grep("{0}".format(self.trace_id)) logs_name = self.gather_log.execute(save_path=work_path_index_sstable_build_task_finish) @@ -194,14 +198,13 @@ def execute(self): # record.add_suggest("创建索引失败发生在补数据阶段,请上传{0}到OceanBase社区".format(self.store_dir)) record.add_suggest("The index creation failed during the data replenishment phase. Please upload {0} to the OceanBase community".format(self.store_dir)) - else: record.add_record("gather rootservice.log by {0}".format(self.trace_id)) - #收集RS日志 + # 收集RS日志 # rs self.verbose("event_data is None") self.verbose("gather rootservice.log by {0}".format(self.trace_id)) - work_path_rs = self.store_dir +"/{0}_on_rs/".format(self.trace_id) + work_path_rs = self.store_dir + "/{0}_on_rs/".format(self.trace_id) self.gather_log.set_parameters("scope", "rootservice") self.gather_log.grep("{0}".format(self.trace_id)) logs_name = self.gather_log.execute(save_path=work_path_rs) @@ -228,4 +231,5 @@ def get_scene_info(self): "info_cn": '建索引执行报错问题排查', } + index_ddl_error = IndexDDLErrorScene() diff --git a/handler/rca/scene/lock_conflict_scene.py b/handler/rca/scene/lock_conflict_scene.py index c2de51be..71b622c5 100644 --- a/handler/rca/scene/lock_conflict_scene.py +++ b/handler/rca/scene/lock_conflict_scene.py @@ -24,8 +24,15 @@ class LockConflictScene(RcaScene): def __init__(self): super().__init__() - def init(self, context): + tenant_name = self.input_parameters.get("tenant_name") + if tenant_name is None or tenant_name=="" : + raise RCAInitException("tenant_name is None. Please check the input parameters.") + tenant_data = self.ob_connector.execute_sql("select tenant_id from oceanbase.__all_tenant where tenant_name = '{0}';".format(tenant_name)) + if len(tenant_data) == 0: + raise RCAInitException("can not find tenant id by tenant name: {0}. Please check the tenant name.".format(tenant_name)) + self.tenant_id = tenant_data[0][0] + self.verbose("tenant_id is {0}".format(self.tenant_id)) try: super().init(context) self.local_path = context.get_variable("store_dir") @@ -45,7 +52,7 @@ def execute(self): def __execute_4_2(self): first_record = RCA_ResultRecord() # get trans_id - cursor = self.ob_connector.execute_sql_return_cursor_dictionary('select * from oceanbase.GV$OB_LOCKS where BLOCK=1 and TYPE="TX" limit 50;') + cursor = self.ob_connector.execute_sql_return_cursor_dictionary('select * from oceanbase.GV$OB_LOCKS where BLOCK=1 and TYPE="TX" and tenant_id="{0}" limit 50;'.format(self.tenant_id)) data = cursor.fetchall() if len(data) == 0: first_record.add_record("on GV$OB_LOCKS result is null") From d6b085658152a1110f392f4516f554f53c70bc19 Mon Sep 17 00:00:00 2001 From: jingyd66 <1071948456@qq.com> Date: Thu, 4 Jul 2024 15:02:41 +0800 Subject: [PATCH 07/10] update0704 --- handler/rca/scene/lock_conflict_scene.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/handler/rca/scene/lock_conflict_scene.py b/handler/rca/scene/lock_conflict_scene.py index 71b622c5..1824e4d0 100644 --- a/handler/rca/scene/lock_conflict_scene.py +++ b/handler/rca/scene/lock_conflict_scene.py @@ -18,21 +18,14 @@ import json from handler.rca.rca_exception import RCAInitException, RCANotNeedExecuteException from handler.rca.rca_handler import RcaScene, RCA_ResultRecord -from common.tool import StringUtils, DateTimeEncoder +from common.tool import StringUtils,DateTimeEncoder class LockConflictScene(RcaScene): def __init__(self): super().__init__() + def init(self, context): - tenant_name = self.input_parameters.get("tenant_name") - if tenant_name is None or tenant_name=="" : - raise RCAInitException("tenant_name is None. Please check the input parameters.") - tenant_data = self.ob_connector.execute_sql("select tenant_id from oceanbase.__all_tenant where tenant_name = '{0}';".format(tenant_name)) - if len(tenant_data) == 0: - raise RCAInitException("can not find tenant id by tenant name: {0}. Please check the tenant name.".format(tenant_name)) - self.tenant_id = tenant_data[0][0] - self.verbose("tenant_id is {0}".format(self.tenant_id)) try: super().init(context) self.local_path = context.get_variable("store_dir") @@ -52,7 +45,7 @@ def execute(self): def __execute_4_2(self): first_record = RCA_ResultRecord() # get trans_id - cursor = self.ob_connector.execute_sql_return_cursor_dictionary('select * from oceanbase.GV$OB_LOCKS where BLOCK=1 and TYPE="TX" and tenant_id="{0}" limit 50;'.format(self.tenant_id)) + cursor = self.ob_connector.execute_sql_return_cursor_dictionary('select * from oceanbase.GV$OB_LOCKS where BLOCK=1 and TYPE="TX" limit 50;') data = cursor.fetchall() if len(data) == 0: first_record.add_record("on GV$OB_LOCKS result is null") @@ -168,4 +161,4 @@ def get_scene_info(self): } -lock_conflict = LockConflictScene() +lock_conflict = LockConflictScene() \ No newline at end of file From 5cb0d4cb00542bacaa0518ef366f4ae5b005afbd Mon Sep 17 00:00:00 2001 From: jingyd66 <1071948456@qq.com> Date: Fri, 5 Jul 2024 14:15:20 +0800 Subject: [PATCH 08/10] update0705 --- handler/rca/scene/lock_conflict_scene.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/handler/rca/scene/lock_conflict_scene.py b/handler/rca/scene/lock_conflict_scene.py index 1824e4d0..c2de51be 100644 --- a/handler/rca/scene/lock_conflict_scene.py +++ b/handler/rca/scene/lock_conflict_scene.py @@ -18,7 +18,7 @@ import json from handler.rca.rca_exception import RCAInitException, RCANotNeedExecuteException from handler.rca.rca_handler import RcaScene, RCA_ResultRecord -from common.tool import StringUtils,DateTimeEncoder +from common.tool import StringUtils, DateTimeEncoder class LockConflictScene(RcaScene): @@ -161,4 +161,4 @@ def get_scene_info(self): } -lock_conflict = LockConflictScene() \ No newline at end of file +lock_conflict = LockConflictScene() From f52323be5bea412ad6d2f4edfe73531f7419804c Mon Sep 17 00:00:00 2001 From: jingyd66 <1071948456@qq.com> Date: Tue, 9 Jul 2024 12:26:46 +0800 Subject: [PATCH 09/10] update0709 --- handler/rca/scene/index_ddl_error_scene.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/handler/rca/scene/index_ddl_error_scene.py b/handler/rca/scene/index_ddl_error_scene.py index e4a458e7..3910587b 100644 --- a/handler/rca/scene/index_ddl_error_scene.py +++ b/handler/rca/scene/index_ddl_error_scene.py @@ -91,8 +91,6 @@ def init(self, context): def verbose(self, info): self.stdio.verbose("[IndexDDLErrorScene] {0}".format(info)) - # self.stdio.print("[IndexDDLErrorScene] {0}".format(info)) - def execute(self): try: record = RCA_ResultRecord() @@ -106,7 +104,6 @@ def execute(self): trace_id_data = self.ob_connector.execute_sql("select trace_id from oceanbase.__all_virtual_ddl_error_message where tenant_id = '{0}' and object_id='{1}';".format(self.tenant_id, self.index_table_id)) self.verbose("trace_id_data is {0}".format(trace_id_data)) if len(trace_id_data) == 0: - # record.add_suggest("创建索引失败发生在发送RPC阶段。此时需要人工介入排查,请把该文件包上传到OcenBase社区{0}".format(self.store_dir)) record.add_suggest("The index creation failure occurs during the RPC sending phase. Manual intervention is required to troubleshoot this issue. Please upload the package to the OcenBase community{0}".format(self.store_dir)) return self.trace_id = trace_id_data[0][0] @@ -117,7 +114,6 @@ def execute(self): task_id_data = self.ob_connector.execute_sql("select task_id from oceanbase.__all_virtual_ddl_error_message where tenant_id = '{0}' and object_id='{1}';".format(self.tenant_id, self.index_table_id)) self.verbose("task_id_data is {0}".format(task_id_data)) if task_id_data is None: - # record.add_suggest("创建索引失败发生在发送RPC阶段。此时需要人工介入排查,请把该文件包上传到OcenBase社区{0}".format(self.store_dir)) record.add_suggest("The index creation failure occurs during the RPC sending phase. Manual intervention is required to troubleshoot this issue. Please upload the package to the OcenBase community{0}".format(self.store_dir)) return self.task_id = task_id_data[0][0] @@ -127,7 +123,6 @@ def execute(self): record.add_record("task_id is {0}".format(self.task_id)) self.verbose("start to get event...") # event_data=self.ob_connector.execute_sql("select event, value6,rs_svr_ip, rs_svr_port from oceanbase.__all_rootservice_event_history where value4 = '{0}' and value2 != 0 and event != 'switch_state' and event not like 'index build task process fail' order by gmt_create desc limit 1;".format(self.task_id)) - # 新方法 sql = "select event, value6,rs_svr_ip, rs_svr_port from oceanbase.__all_rootservice_event_history where value4 = '{0}' and value1='{1}' and value2 != 0 and event != 'switch_state' and event not like 'index build task process fail' order by gmt_create desc limit 1;".format( self.task_id, self.tenant_id ) @@ -135,7 +130,6 @@ def execute(self): self.verbose("event_data is{0}".format(event_data)) if event_data is None: record.add_record("gather rootservice.log by {0}".format(self.trace_id)) - # 收集RS日志 # rs self.verbose("event_data is None") self.verbose("gather rootservice.log by {0}".format(self.trace_id)) @@ -147,7 +141,6 @@ def execute(self): self.verbose("no log_disk_full about trace_id:{0}".format(self.trace_id)) return False record.add_record("Log saving location:{0}".format(work_path_rs)) - # record.add_suggest("创建索引失败发生在其他阶段,请上传{0}到OceanBase社区".format(self.store_dir)) record.add_suggest("The index creation failed during the other phase. Please upload {0} to the OceanBase community".format(self.store_dir)) else: record.add_record("event_data is {0}".format(event_data)) @@ -162,13 +155,10 @@ def execute(self): record.add_record("ip is {0}".format(ip_address)) if self.event == 'ddl wait trans end ctx try wait': self.verbose("ok,event is ddl wait trans end ctx try wait") - # record.add_record("event is {0},即创建索引失败发生在事务结束阶段,此时要根据trace_id:{1}捞取observer日志".format(self.event,self.trace_id)) record.add_record("event is {0},The failure of index creation occurred during the transaction end phase. In this case, the observer logs need to be retrieved based on the trace_id: {1}".format(self.event, self.trace_id)) - # 收集日志 # ddl_wait_trans_end_ctx_try_wait self.verbose("gather observer.log by {0}".format(self.trace_id)) work_path_ddl_wait_trans_end_ctx_try_wait = self.store_dir + "/{0}_on_obs/".format(self.trace_id) - # work_path_ddl_wait_trans_end_ctx_try_wait = self.store_dir + "/checkpoint/" self.gather_log.set_parameters("scope", "observer") self.gather_log.grep("{0}".format(self.trace_id)) logs_name = self.gather_log.execute(save_path=work_path_ddl_wait_trans_end_ctx_try_wait) @@ -176,17 +166,13 @@ def execute(self): self.verbose("no log_disk_full about trace_id:{0}".format(self.trace_id)) return False record.add_record(" Log saving location:{0}".format(work_path_ddl_wait_trans_end_ctx_try_wait)) - # record.add_suggest("创建索引失败发生在事务结束阶段,请上传{0}到OceanBase社区".format(self.store_dir)) record.add_suggest("The failure of index creation occurred during the transaction completion phase. Please upload {0} to the OceanBase community".format(self.store_dir)) elif self.event == 'index sstable build task finish': self.verbose("ok,event is index sstable build task finish") - # record.add_record("event is {0},即创建索引失败发生在补数据阶段,此时要根据trace_id:{1}捞取observer日志".format(self.event,self.trace_id)) record.add_record("event is {0},The failure of index creation occurred during the data replenishment phase. In this case, the observer logs need to be retrieved based on the trace_id: {1}".format(self.event, self.trace_id)) self.verbose("gather observer.log by {0}".format(self.trace_id)) - # 收集日志 # index_sstable_build_task_finish - # self.verbose("__check_checkpoint") work_path_index_sstable_build_task_finish = self.store_dir + "/{0}_on_obs/".format(self.trace_id) self.gather_log.set_parameters("scope", "observer") self.gather_log.grep("{0}".format(self.trace_id)) @@ -195,12 +181,10 @@ def execute(self): self.verbose("no log_disk_full about trace_id:{0}".format(self.trace_id)) return False record.add_record(" Log saving location:{0}".format(work_path_index_sstable_build_task_finish)) - # record.add_suggest("创建索引失败发生在补数据阶段,请上传{0}到OceanBase社区".format(self.store_dir)) record.add_suggest("The index creation failed during the data replenishment phase. Please upload {0} to the OceanBase community".format(self.store_dir)) else: record.add_record("gather rootservice.log by {0}".format(self.trace_id)) - # 收集RS日志 # rs self.verbose("event_data is None") self.verbose("gather rootservice.log by {0}".format(self.trace_id)) @@ -212,7 +196,6 @@ def execute(self): self.verbose("no log_disk_full about trace_id:{0}".format(self.trace_id)) return False record.add_record("Log saving location:{0}".format(work_path_rs)) - # record.add_suggest("创建索引失败发生在其他阶段,请上传{0}到OceanBase社区".format(self.store_dir)) record.add_suggest("The index creation failed during the other phase. Please upload {0} to the OceanBase community".format(self.store_dir)) self.Result.records.append(record) From 4a4ba3751c7b91b62acd39d795a1facb334d382b Mon Sep 17 00:00:00 2001 From: jingyd66 <1071948456@qq.com> Date: Tue, 9 Jul 2024 17:58:53 +0800 Subject: [PATCH 10/10] 0709 --- handler/rca/scene/index_ddl_error_scene.py | 1 + 1 file changed, 1 insertion(+) diff --git a/handler/rca/scene/index_ddl_error_scene.py b/handler/rca/scene/index_ddl_error_scene.py index 3910587b..0d28ff5a 100644 --- a/handler/rca/scene/index_ddl_error_scene.py +++ b/handler/rca/scene/index_ddl_error_scene.py @@ -91,6 +91,7 @@ def init(self, context): def verbose(self, info): self.stdio.verbose("[IndexDDLErrorScene] {0}".format(info)) + def execute(self): try: record = RCA_ResultRecord()