-
Notifications
You must be signed in to change notification settings - Fork 33
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add rca index_ddl_error_scene #278
Merged
Merged
Changes from 8 commits
Commits
Show all changes
10 commits
Select commit
Hold shift + click to select a range
3f8c16f
add rca index_ddl_error_scene
jingyd66 981bf70
update0624
jingyd66 57e9cfe
1111
jingyd66 a354d05
update0702
jingyd66 6a1ebef
update0702
jingyd66 fdc4836
update0704
jingyd66 d6b0856
update0704
jingyd66 5cb0d4c
update0705
jingyd66 f52323b
update0709
jingyd66 4a4ba37
0709
jingyd66 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,235 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: UTF-8 -* | ||
# Copyright (c) 2022 OceanBase | ||
# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. | ||
# You can use this software according to the terms and conditions of the Mulan PSL v2. | ||
# You may obtain a copy of Mulan PSL v2 at: | ||
# http://license.coscl.org.cn/MulanPSL2 | ||
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, | ||
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, | ||
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. | ||
# See the Mulan PSL v2 for more details. | ||
|
||
""" | ||
@time: 2024/06/04 | ||
@file: index_ddl_error_scene.py | ||
@desc: | ||
""" | ||
import re | ||
|
||
from handler.rca.rca_exception import RCAInitException, RCAExecuteException | ||
from handler.rca.rca_handler import RcaScene, RCA_ResultRecord | ||
from common.tool import StringUtils | ||
|
||
|
||
class IndexDDLErrorScene(RcaScene): | ||
def __init__(self): | ||
super().__init__() | ||
self.index_table_id = None | ||
self.work_path = None | ||
self.estimated_size = None | ||
self.estimated_data_size = None | ||
self.index_name = None | ||
self.action_type = None | ||
self.table_id = None | ||
self.tenant_id = None | ||
|
||
def init(self, context): | ||
super().init(context) | ||
## observer version>4.2.3.0 | ||
observer_version = self.observer_version | ||
if observer_version is None or len(observer_version.strip()) == 0: | ||
raise RCAInitException("observer version is None. Please check the NODES conf.") | ||
if not (observer_version == "4.2.3.0" or StringUtils.compare_versions_greater(observer_version, "4.2.3.0")): | ||
self.stdio.error("observer version is {0}, which is less than 4.2.3.0.".format(observer_version)) | ||
raise RCAInitException("observer version is {0}, which is less than 4.2.3.0.".format(observer_version)) | ||
if self.ob_connector is None: | ||
raise RCAInitException("ob_connector is None. Please check the NODES conf.") | ||
self.verbose("observer version is {0}.".format(observer_version)) | ||
# check table_name and tenant_name and database_name and index_name | ||
table_name = self.input_parameters.get("table_name") | ||
tenant_name = self.input_parameters.get("tenant_name") | ||
action_type = self.input_parameters.get("action_type") | ||
index_name = self.input_parameters.get("index_name") | ||
database_name = self.input_parameters.get("database_name") | ||
if table_name is None or table_name == "" or tenant_name is None or tenant_name == "" or index_name is None or index_name == "" or database_name is None or database_name == "": | ||
raise RCAInitException("table_name or tenant_name or database_name or index_name is None. Please check the input parameters.") | ||
|
||
tenant_data = self.ob_connector.execute_sql("select tenant_id from oceanbase.__all_tenant where tenant_name = '{0}';".format(tenant_name)) | ||
if len(tenant_data) == 0: | ||
raise RCAInitException("can not find tenant id by tenant name: {0}. Please check the tenant name.".format(tenant_name)) | ||
self.tenant_id = tenant_data[0][0] | ||
self.verbose("tenant_id is {0}".format(self.tenant_id)) | ||
if self.tenant_id is None: | ||
raise RCAInitException("can not find tenant id by tenant name: {0}. Please check the tenant name.".format(tenant_name)) | ||
database_id_data = self.ob_connector.execute_sql("select database_id from oceanbase.__all_database where database_name = '{0}';".format(database_name)) | ||
if len(database_id_data) == 0: | ||
raise RCAInitException("can not find database id by database name: {0}. Please check the table name.".format(database_name)) | ||
self.database_id = database_id_data[0][0] | ||
self.verbose("database_id is{0}".format(self.database_id)) | ||
if self.database_id is None: | ||
raise RCAInitException("can not find database id by tenant name: {0}. Please check the database name.".format(database_name)) | ||
table_id_data = self.ob_connector.execute_sql("select table_id from oceanbase.__all_virtual_table where table_name='{0}' and tenant_id='{1}' and database_id='{2}';".format(table_name, self.tenant_id, self.database_id)) | ||
if len(table_id_data) == 0: | ||
raise RCAInitException("can not find table id by table name: {0}. Please check the table name.".format(table_name)) | ||
self.table_id = table_id_data[0][0] | ||
self.verbose("table_id is{0}".format(self.table_id)) | ||
if self.table_id is None: | ||
raise RCAInitException("can not find table id by table name: {0}. Please check the database name.".format(table_name)) | ||
# idx_table_id_data=self.ob_connector.execute_sql("select table_id from oceanbase.__all_virtual_table_history where tenant_id ='{0}' and data_table_id='{1}' and table_name like '%{2}%' ;".format(self.tenant_id,self.table_id,index_name)) | ||
idx_table_id_data = self.ob_connector.execute_sql( | ||
"select table_id from oceanbase.__all_virtual_table_history where tenant_id ='{0}' and data_table_id='{1}' and table_name like '%{2}%' order by gmt_create desc limit 1;".format(self.tenant_id, self.table_id, index_name) | ||
) | ||
|
||
if len(idx_table_id_data) == 0: | ||
raise RCAInitException("can not find index table id by table name: {0}. Please check the index name.".format(index_name)) | ||
self.index_table_id = idx_table_id_data[0][0] | ||
self.verbose("index_table_id is{0}".format(self.index_table_id)) | ||
if self.index_table_id is None: | ||
raise RCAInitException("can not find index table id by table name: {0}. Please check the index name.".format(index_name)) | ||
self.verbose("tenant_id is {0},database_id is {1}, table_id is {2},index_table_id is {3}.".format(self.tenant_id, self.database_id, self.table_id, self.index_table_id)) | ||
|
||
def verbose(self, info): | ||
self.stdio.verbose("[IndexDDLErrorScene] {0}".format(info)) | ||
# self.stdio.print("[IndexDDLErrorScene] {0}".format(info)) | ||
|
||
def execute(self): | ||
try: | ||
record = RCA_ResultRecord() | ||
record.add_record("tenant_id is {0}".format(self.tenant_id)) | ||
record.add_record("database_id is {0}".format(self.database_id)) | ||
record.add_record("table_id is {0}".format(self.table_id)) | ||
record.add_record("index_table_id is {0}".format(self.index_table_id)) | ||
record.add_record("index_name is {0}".format(self.input_parameters.get("index_name"))) | ||
self.verbose("start to get trace_id and task_id...") | ||
# trace_id | ||
trace_id_data = self.ob_connector.execute_sql("select trace_id from oceanbase.__all_virtual_ddl_error_message where tenant_id = '{0}' and object_id='{1}';".format(self.tenant_id, self.index_table_id)) | ||
self.verbose("trace_id_data is {0}".format(trace_id_data)) | ||
if len(trace_id_data) == 0: | ||
# record.add_suggest("创建索引失败发生在发送RPC阶段。此时需要人工介入排查,请把该文件包上传到OcenBase社区{0}".format(self.store_dir)) | ||
record.add_suggest("The index creation failure occurs during the RPC sending phase. Manual intervention is required to troubleshoot this issue. Please upload the package to the OcenBase community{0}".format(self.store_dir)) | ||
return | ||
self.trace_id = trace_id_data[0][0] | ||
self.verbose("trace_id is{0}".format(self.trace_id)) | ||
if self.trace_id is None: | ||
raise RCAInitException("can not find trace_id id by index name: {0}. Please check the index name.".format(self.index_name)) | ||
# task_id | ||
task_id_data = self.ob_connector.execute_sql("select task_id from oceanbase.__all_virtual_ddl_error_message where tenant_id = '{0}' and object_id='{1}';".format(self.tenant_id, self.index_table_id)) | ||
self.verbose("task_id_data is {0}".format(task_id_data)) | ||
if task_id_data is None: | ||
# record.add_suggest("创建索引失败发生在发送RPC阶段。此时需要人工介入排查,请把该文件包上传到OcenBase社区{0}".format(self.store_dir)) | ||
record.add_suggest("The index creation failure occurs during the RPC sending phase. Manual intervention is required to troubleshoot this issue. Please upload the package to the OcenBase community{0}".format(self.store_dir)) | ||
return | ||
self.task_id = task_id_data[0][0] | ||
self.verbose("task_id is {0}".format(self.task_id)) | ||
|
||
record.add_record("trace_id is {0}".format(self.trace_id)) | ||
record.add_record("task_id is {0}".format(self.task_id)) | ||
self.verbose("start to get event...") | ||
# event_data=self.ob_connector.execute_sql("select event, value6,rs_svr_ip, rs_svr_port from oceanbase.__all_rootservice_event_history where value4 = '{0}' and value2 != 0 and event != 'switch_state' and event not like 'index build task process fail' order by gmt_create desc limit 1;".format(self.task_id)) | ||
# 新方法 | ||
sql = "select event, value6,rs_svr_ip, rs_svr_port from oceanbase.__all_rootservice_event_history where value4 = '{0}' and value1='{1}' and value2 != 0 and event != 'switch_state' and event not like 'index build task process fail' order by gmt_create desc limit 1;".format( | ||
self.task_id, self.tenant_id | ||
) | ||
event_data = self.ob_connector.execute_sql_return_cursor_dictionary(sql).fetchall() | ||
self.verbose("event_data is{0}".format(event_data)) | ||
if event_data is None: | ||
record.add_record("gather rootservice.log by {0}".format(self.trace_id)) | ||
# 收集RS日志 | ||
# rs | ||
self.verbose("event_data is None") | ||
self.verbose("gather rootservice.log by {0}".format(self.trace_id)) | ||
work_path_rs = self.store_dir + "/{0}_on_rs/".format(self.trace_id) | ||
self.gather_log.set_parameters("scope", "rootservice") | ||
self.gather_log.grep("{0}".format(self.trace_id)) | ||
logs_name = self.gather_log.execute(save_path=work_path_rs) | ||
if logs_name is None or len(logs_name) <= 0: | ||
self.verbose("no log_disk_full about trace_id:{0}".format(self.trace_id)) | ||
return False | ||
record.add_record("Log saving location:{0}".format(work_path_rs)) | ||
# record.add_suggest("创建索引失败发生在其他阶段,请上传{0}到OceanBase社区".format(self.store_dir)) | ||
record.add_suggest("The index creation failed during the other phase. Please upload {0} to the OceanBase community".format(self.store_dir)) | ||
else: | ||
record.add_record("event_data is {0}".format(event_data)) | ||
self.event = event_data[0]["event"] | ||
self.verbose("event is {0}".format(self.event)) | ||
record.add_record("event is {0}".format(self.event)) | ||
self.value6 = event_data[0]["value6"] | ||
self.inner_sql_execute_addr = self.value6 | ||
self.verbose("inner_sql_execute_addr is {0}".format(self.inner_sql_execute_addr)) | ||
record.add_record("inner_sql_execute_addr is {0}".format(self.inner_sql_execute_addr)) | ||
ip_address = self.inner_sql_execute_addr.split(":")[0].strip('"') | ||
record.add_record("ip is {0}".format(ip_address)) | ||
if self.event == 'ddl wait trans end ctx try wait': | ||
self.verbose("ok,event is ddl wait trans end ctx try wait") | ||
# record.add_record("event is {0},即创建索引失败发生在事务结束阶段,此时要根据trace_id:{1}捞取observer日志".format(self.event,self.trace_id)) | ||
record.add_record("event is {0},The failure of index creation occurred during the transaction end phase. In this case, the observer logs need to be retrieved based on the trace_id: {1}".format(self.event, self.trace_id)) | ||
# 收集日志 | ||
# ddl_wait_trans_end_ctx_try_wait | ||
self.verbose("gather observer.log by {0}".format(self.trace_id)) | ||
work_path_ddl_wait_trans_end_ctx_try_wait = self.store_dir + "/{0}_on_obs/".format(self.trace_id) | ||
# work_path_ddl_wait_trans_end_ctx_try_wait = self.store_dir + "/checkpoint/" | ||
self.gather_log.set_parameters("scope", "observer") | ||
self.gather_log.grep("{0}".format(self.trace_id)) | ||
logs_name = self.gather_log.execute(save_path=work_path_ddl_wait_trans_end_ctx_try_wait) | ||
if logs_name is None or len(logs_name) <= 0: | ||
self.verbose("no log_disk_full about trace_id:{0}".format(self.trace_id)) | ||
return False | ||
record.add_record(" Log saving location:{0}".format(work_path_ddl_wait_trans_end_ctx_try_wait)) | ||
# record.add_suggest("创建索引失败发生在事务结束阶段,请上传{0}到OceanBase社区".format(self.store_dir)) | ||
record.add_suggest("The failure of index creation occurred during the transaction completion phase. Please upload {0} to the OceanBase community".format(self.store_dir)) | ||
|
||
elif self.event == 'index sstable build task finish': | ||
self.verbose("ok,event is index sstable build task finish") | ||
# record.add_record("event is {0},即创建索引失败发生在补数据阶段,此时要根据trace_id:{1}捞取observer日志".format(self.event,self.trace_id)) | ||
record.add_record("event is {0},The failure of index creation occurred during the data replenishment phase. In this case, the observer logs need to be retrieved based on the trace_id: {1}".format(self.event, self.trace_id)) | ||
self.verbose("gather observer.log by {0}".format(self.trace_id)) | ||
# 收集日志 | ||
# index_sstable_build_task_finish | ||
# self.verbose("__check_checkpoint") | ||
work_path_index_sstable_build_task_finish = self.store_dir + "/{0}_on_obs/".format(self.trace_id) | ||
self.gather_log.set_parameters("scope", "observer") | ||
self.gather_log.grep("{0}".format(self.trace_id)) | ||
logs_name = self.gather_log.execute(save_path=work_path_index_sstable_build_task_finish) | ||
if logs_name is None or len(logs_name) <= 0: | ||
self.verbose("no log_disk_full about trace_id:{0}".format(self.trace_id)) | ||
return False | ||
record.add_record(" Log saving location:{0}".format(work_path_index_sstable_build_task_finish)) | ||
# record.add_suggest("创建索引失败发生在补数据阶段,请上传{0}到OceanBase社区".format(self.store_dir)) | ||
record.add_suggest("The index creation failed during the data replenishment phase. Please upload {0} to the OceanBase community".format(self.store_dir)) | ||
|
||
else: | ||
record.add_record("gather rootservice.log by {0}".format(self.trace_id)) | ||
# 收集RS日志 | ||
# rs | ||
self.verbose("event_data is None") | ||
self.verbose("gather rootservice.log by {0}".format(self.trace_id)) | ||
work_path_rs = self.store_dir + "/{0}_on_rs/".format(self.trace_id) | ||
self.gather_log.set_parameters("scope", "rootservice") | ||
self.gather_log.grep("{0}".format(self.trace_id)) | ||
logs_name = self.gather_log.execute(save_path=work_path_rs) | ||
if logs_name is None or len(logs_name) <= 0: | ||
self.verbose("no log_disk_full about trace_id:{0}".format(self.trace_id)) | ||
return False | ||
record.add_record("Log saving location:{0}".format(work_path_rs)) | ||
# record.add_suggest("创建索引失败发生在其他阶段,请上传{0}到OceanBase社区".format(self.store_dir)) | ||
record.add_suggest("The index creation failed during the other phase. Please upload {0} to the OceanBase community".format(self.store_dir)) | ||
|
||
self.Result.records.append(record) | ||
except Exception as e: | ||
raise RCAExecuteException("IndexDDLErrorScene execute error: {0}".format(e)) | ||
finally: | ||
self.stdio.verbose("end IndexDDLErrorScene execute") | ||
|
||
def export_result(self): | ||
super().export_result() | ||
|
||
def get_scene_info(self): | ||
return { | ||
"name": "index_ddl_error", | ||
"info_en": "Troubleshooting errors in indexing execution. ", | ||
"info_cn": '建索引执行报错问题排查', | ||
} | ||
|
||
|
||
index_ddl_error = IndexDDLErrorScene() |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
这种注释掉的代码都删了吧