From f776b1b0fea5a9527d9e40012181f21585a973d6 Mon Sep 17 00:00:00 2001 From: Teingi Date: Thu, 11 Jul 2024 11:46:37 +0800 Subject: [PATCH 01/18] table dump print pretty result --- core.py | 2 ++ handler/gather/gather_tabledump.py | 13 ++++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/core.py b/core.py index 27297831..7d678312 100644 --- a/core.py +++ b/core.py @@ -239,6 +239,7 @@ def gather_function(self, function_type, opt): return handler.handle() elif function_type == 'gather_tabledump': handler = GatherTableDumpHandler(self.context) + return handler.handle() elif function_type == 'gather_parameters': handler = GatherParametersHandler(self.context) return handler.handle() @@ -290,6 +291,7 @@ def analyze_fuction(self, function_type, opt): elif function_type == 'analyze_sql_review': self.set_context(function_type, 'analyze', config) handler = AnalyzeSQLReviewHandler(self.context) + handler.handle() elif function_type == 'analyze_parameter_non_default': self.set_context(function_type, 'analyze', config) handler = AnalyzeParameterHandler(self.context, 'non_default') diff --git a/handler/gather/gather_tabledump.py b/handler/gather/gather_tabledump.py index 52ab0538..61196405 100644 --- a/handler/gather/gather_tabledump.py +++ b/handler/gather/gather_tabledump.py @@ -17,6 +17,7 @@ """ import os +import time from stdio import SafeStdio from common.ob_connector import OBConnector from common.tool import StringUtils @@ -93,12 +94,13 @@ def init_config(self): return False def handle(self): + self.start_time = time.time() if not self.init_config(): self.stdio.error('init config failed') return False self.execute() if not self.is_innner: - self.stdio.print("get table info finished. For more details, please run cmd '" + Fore.YELLOW + " cat {0} ".format(self.file_name) + Style.RESET_ALL + "'") + self.__print_result() def execute(self): try: @@ -227,3 +229,12 @@ def __extract_string(self, s): return s[at_index + 1 :] else: return s + + def __print_result(self): + self.end_time = time.time() + elapsed_time = self.end_time - self.start_time + data = [["Status", "Result Details", "Time"], ["Completed", self.file_name, f"{elapsed_time:.2f} s"]] + table = tabulate(data, headers="firstrow", tablefmt="grid") + self.stdio.print("\nAnalyze SQL Summary:") + self.stdio.print(table) + self.stdio.print("\n") From d77000a1b951b0ddc4f8e40132849d760aa2ec45 Mon Sep 17 00:00:00 2001 From: Teingi Date: Fri, 12 Jul 2024 11:29:52 +0800 Subject: [PATCH 02/18] Fix regression testing bugs --- dependencies/bin/obstack_x86_64_7 | Bin handler/analyzer/analyze_flt_trace.py | 3 ++- handler/gather/gather_obstack2.py | 1 - handler/gather/gather_perf.py | 9 +++++---- handler/gather/gather_plan_monitor.py | 2 +- 5 files changed, 8 insertions(+), 7 deletions(-) mode change 100644 => 100755 dependencies/bin/obstack_x86_64_7 diff --git a/dependencies/bin/obstack_x86_64_7 b/dependencies/bin/obstack_x86_64_7 old mode 100644 new mode 100755 diff --git a/handler/analyzer/analyze_flt_trace.py b/handler/analyzer/analyze_flt_trace.py index fb14ccae..8624baaf 100644 --- a/handler/analyzer/analyze_flt_trace.py +++ b/handler/analyzer/analyze_flt_trace.py @@ -192,7 +192,7 @@ def check_filename(filename): log_full_path = "{gather_path}/{log_name}".format(log_name=self.flt_trace_id, gather_path=gather_path) download_file(ssh_client, log_full_path, local_store_path, self.stdio) - def __get_offline_log_file(self, ssh_client, log_full_path, local_store_dir): + def __get_offline_log_file(self, ssh_client, log_path, local_store_dir): """ :param ssh_client, log_name :return: @@ -202,6 +202,7 @@ def __get_offline_log_file(self, ssh_client, log_full_path, local_store_dir): if self.flt_trace_id is not None and (len(log_name_list) > 0): grep_cmd = "grep -e '{grep_args}' {log_file} > {local_store_path} ".format(grep_args=self.flt_trace_id, log_file=' '.join(log_name_list), local_store_path=local_store_path) LocalClient(self.stdio).run(grep_cmd) + log_full_path = "{gather_path}/{log_name}".format(gather_path=log_path, log_name=self.flt_trace_id) download_file(ssh_client, log_full_path, local_store_path, self.stdio) def __get_log_name_list_offline(self): diff --git a/handler/gather/gather_obstack2.py b/handler/gather/gather_obstack2.py index 2178a51e..e1167ccb 100644 --- a/handler/gather/gather_obstack2.py +++ b/handler/gather/gather_obstack2.py @@ -224,7 +224,6 @@ def __gather_obstack2_info(self, ssh_client, user, observer_pid, remote_gather_d ssh_client.exec_cmd(chown_cmd) self.stdio.verbose("gather obstack info on server {0}, run cmd = [su {1}, {2}]".format(ssh_client.get_name(), user, cmd)) ssh_client.ssh_invoke_shell_switch_user(user, cmd, 10) - ssh_client.exec_cmd("rm -rf /tmp/{0}".format(remote_gather_dir)) @staticmethod def __get_overall_summary(node_summary_tuple): diff --git a/handler/gather/gather_perf.py b/handler/gather/gather_perf.py index e9244425..1c2a71cc 100644 --- a/handler/gather/gather_perf.py +++ b/handler/gather/gather_perf.py @@ -22,7 +22,7 @@ import tabulate from common.command import get_observer_pid, mkdir, zip_dir, get_file_size, download_file, delete_file_force -from common.command import LocalClient, SshClient +from common.command import SshClient from common.constant import const from handler.base_shell_handler import BaseShellHandler from common.tool import Util @@ -118,9 +118,6 @@ def __handle_from_node(self, node, local_stored_path): resp = {"skip": False, "error": "", "gather_pack_path": ""} remote_ip = node.get("ip") if self.is_ssh else NetUtils.get_inner_ip(self.stdio) remote_user = node.get("ssh_username") - remote_password = node.get("ssh_password") - remote_port = node.get("ssh_port") - remote_private_key = node.get("ssh_key_file") self.stdio.verbose("Sending Collect Shell Command to node {0} ...".format(remote_ip)) DirectoryUtil.mkdir(path=local_stored_path, stdio=self.stdio) now_time = datetime.datetime.now().strftime('%Y%m%d%H%M%S') @@ -167,17 +164,20 @@ def __handle_from_node(self, node, local_stored_path): def __gather_perf_sample(self, ssh_client, gather_path, pid_observer): try: + self.stdio.start_loading('gather perf sample') cmd = "cd {gather_path} && perf record -o sample.data -e cycles -c 100000000 -p {pid} -g -- sleep 20".format(gather_path=gather_path, pid=pid_observer) self.stdio.verbose("gather perf sample, run cmd = [{0}]".format(cmd)) ssh_client.exec_cmd(cmd) generate_data = "cd {gather_path} && perf script -i sample.data -F ip,sym -f > sample.viz".format(gather_path=gather_path) self.stdio.verbose("generate perf sample data, run cmd = [{0}]".format(generate_data)) ssh_client.exec_cmd(generate_data) + self.stdio.stop_loading('gather perf sample') except: self.stdio.error("generate perf sample data on server [{0}] failed".format(ssh_client.get_name())) def __gather_perf_flame(self, ssh_client, gather_path, pid_observer): try: + self.stdio.start_loading('gather perf flame') perf_cmd = "cd {gather_path} && perf record -o flame.data -F 99 -p {pid} -g -- sleep 20".format(gather_path=gather_path, pid=pid_observer) self.stdio.verbose("gather perf, run cmd = [{0}]".format(perf_cmd)) ssh_client.exec_cmd(perf_cmd) @@ -185,6 +185,7 @@ def __gather_perf_flame(self, ssh_client, gather_path, pid_observer): generate_data = "cd {gather_path} && perf script -i flame.data > flame.viz".format(gather_path=gather_path) self.stdio.verbose("generate perf data, run cmd = [{0}]".format(generate_data)) ssh_client.exec_cmd(generate_data) + self.stdio.stop_loading('gather perf flame') except: self.stdio.error("generate perf data on server [{0}] failed".format(ssh_client.get_name())) diff --git a/handler/gather/gather_plan_monitor.py b/handler/gather/gather_plan_monitor.py index 454c49a5..38683d8e 100644 --- a/handler/gather/gather_plan_monitor.py +++ b/handler/gather/gather_plan_monitor.py @@ -259,7 +259,7 @@ def get_table_info(self, file_path): data = f.read() return data except Exception as e: - self.stdio.error(e) + self.stdio.warn(e) return None def report_schema(self, sql, tenant_name): From c46510ac129ae1e94f7807a070967472d836a3e3 Mon Sep 17 00:00:00 2001 From: Teingi Date: Fri, 12 Jul 2024 11:32:31 +0800 Subject: [PATCH 03/18] Fix regression testing bugs --- dependencies/bin/obstack_x86_64_7 | Bin 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 dependencies/bin/obstack_x86_64_7 diff --git a/dependencies/bin/obstack_x86_64_7 b/dependencies/bin/obstack_x86_64_7 old mode 100755 new mode 100644 From b62132f66452fab929dd98071a9c3f7855e1bda8 Mon Sep 17 00:00:00 2001 From: Teingi Date: Fri, 12 Jul 2024 11:47:07 +0800 Subject: [PATCH 04/18] Optimize logs --- dependencies/bin/obstack_x86_64_7 | Bin diag_cmd.py | 2 +- handler/analyzer/analyze_log.py | 2 +- handler/analyzer/analyze_parameter.py | 4 ++-- handler/analyzer/analyze_sql.py | 2 +- handler/analyzer/analyze_sql_review.py | 2 +- handler/analyzer/analyze_variable.py | 2 +- handler/gather/gather_ash_report.py | 2 +- handler/gather/gather_awr.py | 2 +- handler/gather/gather_log.py | 2 +- handler/gather/gather_obadmin.py | 2 +- handler/gather/gather_obproxy_log.py | 2 +- handler/gather/gather_obstack2.py | 2 +- handler/gather/gather_parameters.py | 2 +- handler/gather/gather_perf.py | 2 +- handler/gather/gather_scenes.py | 2 +- handler/gather/gather_sysstat.py | 2 +- handler/gather/gather_variables.py | 2 +- 18 files changed, 18 insertions(+), 18 deletions(-) mode change 100644 => 100755 dependencies/bin/obstack_x86_64_7 diff --git a/dependencies/bin/obstack_x86_64_7 b/dependencies/bin/obstack_x86_64_7 old mode 100644 new mode 100755 diff --git a/diag_cmd.py b/diag_cmd.py index 49b31fb1..24f8a6ec 100644 --- a/diag_cmd.py +++ b/diag_cmd.py @@ -744,7 +744,7 @@ def _do_command(self, obdiag): class ObdiagAnalyzeSQLReviewCommand(ObdiagOriginCommand): def __init__(self): - super(ObdiagAnalyzeSQLReviewCommand, self).__init__('sql_review', 'Analyze oceanbase sql from sql_audit ') + super(ObdiagAnalyzeSQLReviewCommand, self).__init__('sql_review', 'Analyze oceanbase sql from file') self.parser.add_option('--host', type='string', help="tenant connection host") self.parser.add_option('--port', type='string', help="tenant connection port") self.parser.add_option('--password', type='string', help="tenant connection user password", default='') diff --git a/handler/analyzer/analyze_log.py b/handler/analyzer/analyze_log.py index 0d4a9646..434211e6 100644 --- a/handler/analyzer/analyze_log.py +++ b/handler/analyzer/analyze_log.py @@ -110,7 +110,7 @@ def init_option(self): self.stdio.print('analyze log from_time: {0}, to_time: {1}'.format(self.from_time_str, self.to_time_str)) if store_dir_option is not None: if not os.path.exists(os.path.abspath(store_dir_option)): - self.stdio.warn('Error: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + self.stdio.warn('args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) os.makedirs(os.path.abspath(store_dir_option)) self.gather_pack_dir = os.path.abspath(store_dir_option) if grep_option is not None: diff --git a/handler/analyzer/analyze_parameter.py b/handler/analyzer/analyze_parameter.py index 97ca13cc..96c8c836 100644 --- a/handler/analyzer/analyze_parameter.py +++ b/handler/analyzer/analyze_parameter.py @@ -82,7 +82,7 @@ def init_option_non_default(self): offline_file_option = Util.get_option(options, 'file') if store_dir_option and store_dir_option != "./": if not os.path.exists(os.path.abspath(store_dir_option)): - self.stdio.warn('warn: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + self.stdio.warn('args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) os.makedirs(os.path.abspath(store_dir_option)) self.export_report_path = os.path.abspath(store_dir_option) else: @@ -105,7 +105,7 @@ def init_option_diff(self): offline_file_option = Util.get_option(options, 'file') if store_dir_option and store_dir_option != "./": if not os.path.exists(os.path.abspath(store_dir_option)): - self.stdio.warn('warn: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + self.stdio.warn('args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) os.makedirs(os.path.abspath(store_dir_option)) self.export_report_path = os.path.abspath(store_dir_option) else: diff --git a/handler/analyzer/analyze_sql.py b/handler/analyzer/analyze_sql.py index d54168e3..e6ab6374 100644 --- a/handler/analyzer/analyze_sql.py +++ b/handler/analyzer/analyze_sql.py @@ -161,7 +161,7 @@ def init_option(self): store_dir_option = Util.get_option(options, 'store_dir') if store_dir_option is not None: if not os.path.exists(os.path.abspath(store_dir_option)): - self.stdio.warn('Error: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + self.stdio.warn('args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) os.makedirs(os.path.abspath(store_dir_option)) self.local_stored_parrent_path = os.path.abspath(store_dir_option) output_option = Util.get_option(options, 'output') diff --git a/handler/analyzer/analyze_sql_review.py b/handler/analyzer/analyze_sql_review.py index 1b69f3eb..c4253705 100644 --- a/handler/analyzer/analyze_sql_review.py +++ b/handler/analyzer/analyze_sql_review.py @@ -91,7 +91,7 @@ def init_option(self): store_dir_option = Util.get_option(options, 'store_dir') if store_dir_option is not None: if not os.path.exists(os.path.abspath(store_dir_option)): - self.stdio.warn('Error: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + self.stdio.warn('args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) os.makedirs(os.path.abspath(store_dir_option)) self.local_stored_parrent_path = os.path.abspath(store_dir_option) output_option = Util.get_option(options, 'output') diff --git a/handler/analyzer/analyze_variable.py b/handler/analyzer/analyze_variable.py index 43fc8d32..cf88e64f 100644 --- a/handler/analyzer/analyze_variable.py +++ b/handler/analyzer/analyze_variable.py @@ -75,7 +75,7 @@ def init_option(self): if store_dir_option and store_dir_option != "./": if not os.path.exists(os.path.abspath(store_dir_option)): - self.stdio.warn('warn: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + self.stdio.warn('args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) os.makedirs(os.path.abspath(store_dir_option)) self.export_report_path = os.path.abspath(store_dir_option) else: diff --git a/handler/gather/gather_ash_report.py b/handler/gather/gather_ash_report.py index f6aa955e..fc1e4eb1 100644 --- a/handler/gather/gather_ash_report.py +++ b/handler/gather/gather_ash_report.py @@ -153,7 +153,7 @@ def init_option(self): self.stdio.print('gather from_time: {0}, to_time: {1}'.format(self.from_time_str, self.to_time_str)) if store_dir_option: if not os.path.exists(os.path.abspath(store_dir_option)): - self.stdio.warn('warn: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + self.stdio.warn('args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) os.makedirs(os.path.abspath(store_dir_option)) self.gather_pack_dir = os.path.abspath(store_dir_option) if sql_id_option: diff --git a/handler/gather/gather_awr.py b/handler/gather/gather_awr.py index 9e58d106..bec5b9e6 100644 --- a/handler/gather/gather_awr.py +++ b/handler/gather/gather_awr.py @@ -270,7 +270,7 @@ def init_option(self): self.stdio.print('gather log from_time: {0}, to_time: {1}'.format(self.from_time_str, self.to_time_str)) if store_dir_option and store_dir_option != "./": if not os.path.exists(os.path.abspath(store_dir_option)): - self.stdio.warn('warn: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + self.stdio.warn('args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) os.makedirs(os.path.abspath(store_dir_option)) self.gather_pack_dir = os.path.abspath(store_dir_option) return True diff --git a/handler/gather/gather_log.py b/handler/gather/gather_log.py index 8bbbf412..f368cab5 100644 --- a/handler/gather/gather_log.py +++ b/handler/gather/gather_log.py @@ -118,7 +118,7 @@ def init_option(self): self.stdio.print('gather log from_time: {0}, to_time: {1}'.format(self.from_time_str, self.to_time_str)) if store_dir_option is not None and store_dir_option != './': if not os.path.exists(os.path.abspath(store_dir_option)): - self.stdio.warn('warn: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + self.stdio.warn('args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) os.makedirs(os.path.abspath(store_dir_option)) self.gather_pack_dir = os.path.abspath(store_dir_option) if scope_option: diff --git a/handler/gather/gather_obadmin.py b/handler/gather/gather_obadmin.py index 39169fc5..a7c3da04 100644 --- a/handler/gather/gather_obadmin.py +++ b/handler/gather/gather_obadmin.py @@ -106,7 +106,7 @@ def init_option(self): self.stdio.print('gather from_time: {0}, to_time: {1}'.format(self.from_time_str, self.to_time_str)) if store_dir_option and store_dir_option != './': if not os.path.exists(os.path.abspath(store_dir_option)): - self.stdio.warn('Error: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + self.stdio.warn('args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) os.makedirs(os.path.abspath(store_dir_option)) self.local_stored_path = os.path.abspath(store_dir_option) if encrypt_option == "true": diff --git a/handler/gather/gather_obproxy_log.py b/handler/gather/gather_obproxy_log.py index 265e6446..efd54b37 100644 --- a/handler/gather/gather_obproxy_log.py +++ b/handler/gather/gather_obproxy_log.py @@ -121,7 +121,7 @@ def init_option(self): self.stdio.print('gather from_time: {0}, to_time: {1}'.format(self.from_time_str, self.to_time_str)) if store_dir_option and store_dir_option != './': if not os.path.exists(os.path.abspath(store_dir_option)): - self.stdio.warn('warn: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + self.stdio.warn('args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) os.makedirs(os.path.abspath(store_dir_option)) self.gather_pack_dir = os.path.abspath(store_dir_option) if scope_option: diff --git a/handler/gather/gather_obstack2.py b/handler/gather/gather_obstack2.py index e1167ccb..2ca09f70 100644 --- a/handler/gather/gather_obstack2.py +++ b/handler/gather/gather_obstack2.py @@ -69,7 +69,7 @@ def init_option(self): store_dir_option = Util.get_option(options, 'store_dir') if store_dir_option and store_dir_option != './': if not os.path.exists(os.path.abspath(store_dir_option)): - self.stdio.warn('warn: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + self.stdio.warn('args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) os.makedirs(os.path.abspath(store_dir_option)) self.local_stored_path = os.path.abspath(store_dir_option) return True diff --git a/handler/gather/gather_parameters.py b/handler/gather/gather_parameters.py index bec7463e..066179c9 100644 --- a/handler/gather/gather_parameters.py +++ b/handler/gather/gather_parameters.py @@ -66,7 +66,7 @@ def init_option(self): store_dir_option = Util.get_option(options, 'store_dir') if store_dir_option and store_dir_option != "./": if not os.path.exists(os.path.abspath(store_dir_option)): - self.stdio.warn('warn: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + self.stdio.warn('args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) os.makedirs(os.path.abspath(store_dir_option)) self.gather_pack_dir = os.path.abspath(store_dir_option) return True diff --git a/handler/gather/gather_perf.py b/handler/gather/gather_perf.py index 1c2a71cc..db792d3d 100644 --- a/handler/gather/gather_perf.py +++ b/handler/gather/gather_perf.py @@ -70,7 +70,7 @@ def init_option(self): store_dir_option = Util.get_option(options, 'store_dir') if store_dir_option and store_dir_option != './': if not os.path.exists(os.path.abspath(store_dir_option)): - self.stdio.warn('warn: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + self.stdio.warn('args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) os.makedirs(os.path.abspath(store_dir_option)) self.local_stored_path = os.path.abspath(store_dir_option) self.scope_option = Util.get_option(options, 'scope') diff --git a/handler/gather/gather_scenes.py b/handler/gather/gather_scenes.py index b782c672..d54e2f57 100644 --- a/handler/gather/gather_scenes.py +++ b/handler/gather/gather_scenes.py @@ -209,7 +209,7 @@ def init_option(self): self.stdio.print('gather from_time: {0}, to_time: {1}'.format(self.from_time_str, self.to_time_str)) if store_dir_option: if not os.path.exists(os.path.abspath(store_dir_option)): - self.stdio.warn('warn: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + self.stdio.warn('args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) os.makedirs(os.path.abspath(store_dir_option)) self.gather_pack_dir = os.path.abspath(store_dir_option) if scene_option: diff --git a/handler/gather/gather_sysstat.py b/handler/gather/gather_sysstat.py index f9299c7d..a77dff57 100644 --- a/handler/gather/gather_sysstat.py +++ b/handler/gather/gather_sysstat.py @@ -71,7 +71,7 @@ def init_option(self): store_dir_option = Util.get_option(options, 'store_dir') if store_dir_option and store_dir_option != './': if not os.path.exists(os.path.abspath(store_dir_option)): - self.stdio.warn('warn: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + self.stdio.warn('args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) os.makedirs(os.path.abspath(store_dir_option)) self.local_stored_path = os.path.abspath(store_dir_option) self.scope_option = Util.get_option(options, 'scope') diff --git a/handler/gather/gather_variables.py b/handler/gather/gather_variables.py index 55c790ba..f1e2ea99 100644 --- a/handler/gather/gather_variables.py +++ b/handler/gather/gather_variables.py @@ -64,7 +64,7 @@ def init_option(self): store_dir_option = Util.get_option(options, 'store_dir') if store_dir_option and store_dir_option != "./": if not os.path.exists(os.path.abspath(store_dir_option)): - self.stdio.warn('warn: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + self.stdio.warn('args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) os.makedirs(os.path.abspath(store_dir_option)) self.gather_pack_dir = os.path.abspath(store_dir_option) return True From 6228ba39199a34c14994fdd2e9dbbb0495b3f187 Mon Sep 17 00:00:00 2001 From: Teingi Date: Fri, 12 Jul 2024 11:47:28 +0800 Subject: [PATCH 05/18] Optimize logs --- dependencies/bin/obstack_x86_64_7 | Bin 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 dependencies/bin/obstack_x86_64_7 diff --git a/dependencies/bin/obstack_x86_64_7 b/dependencies/bin/obstack_x86_64_7 old mode 100755 new mode 100644 From 6b286e42b56f886294880cadd3d94309acf230a5 Mon Sep 17 00:00:00 2001 From: Teingi Date: Fri, 12 Jul 2024 11:55:25 +0800 Subject: [PATCH 06/18] Optimize logs --- handler/analyzer/analyze_parameter.py | 4 ++-- handler/analyzer/analyze_variable.py | 4 ++-- handler/gather/gather_parameters.py | 14 +++++++------- handler/gather/gather_tabledump.py | 2 +- handler/gather/gather_variables.py | 8 ++++---- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/handler/analyzer/analyze_parameter.py b/handler/analyzer/analyze_parameter.py index 96c8c836..47b51480 100644 --- a/handler/analyzer/analyze_parameter.py +++ b/handler/analyzer/analyze_parameter.py @@ -59,8 +59,8 @@ def get_version(self): try: observer_version = get_observer_version_by_sql(self.ob_cluster, self.stdio) except Exception as e: - self.stdio.warn("AnalyzeHandler Failed to get observer version:{0}".format(e)) - self.stdio.verbose("AnalyzeHandler.init get observer version: {0}".format(observer_version)) + self.stdio.warn("failed to get observer version:{0}".format(e)) + self.stdio.verbose("get observer version: {0}".format(observer_version)) return observer_version def handle(self): diff --git a/handler/analyzer/analyze_variable.py b/handler/analyzer/analyze_variable.py index cf88e64f..478c3c3d 100644 --- a/handler/analyzer/analyze_variable.py +++ b/handler/analyzer/analyze_variable.py @@ -48,8 +48,8 @@ def __init__(self, context): database="oceanbase", ) except Exception as e: - self.stdio.error("Failed to connect to database: {0}".format(e)) - raise OBDIAGFormatException("Failed to connect to database: {0}".format(e)) + self.stdio.error("failed to connect to database: {0}".format(e)) + raise OBDIAGFormatException("failed to connect to database: {0}".format(e)) def handle(self): if not self.init_option(): diff --git a/handler/gather/gather_parameters.py b/handler/gather/gather_parameters.py index 066179c9..ea553faf 100644 --- a/handler/gather/gather_parameters.py +++ b/handler/gather/gather_parameters.py @@ -47,8 +47,8 @@ def __init__(self, context, gather_pack_dir='./'): database="oceanbase", ) except Exception as e: - self.stdio.error("Failed to connect to database: {0}".format(e)) - raise OBDIAGFormatException("Failed to connect to database: {0}".format(e)) + self.stdio.error("failed to connect to database: {0}".format(e)) + raise OBDIAGFormatException("failed to connect to database: {0}".format(e)) def handle(self): if not self.init_option(): @@ -76,8 +76,8 @@ def get_version(self): try: observer_version = get_observer_version_by_sql(self.ob_cluster, self.stdio) except Exception as e: - self.stdio.warn("GatherHandler Failed to get observer version:{0}".format(e)) - self.stdio.verbose("GatherHandler.init get observer version: {0}".format(observer_version)) + self.stdio.warn("failed to get observer version:{0}".format(e)) + self.stdio.verbose("get observer version: {0}".format(observer_version)) return observer_version def get_cluster_name(self): @@ -87,8 +87,8 @@ def get_cluster_name(self): cluster_info = self.obconn.execute_sql(sql) cluster_name = cluster_info[0][0] except Exception as e: - self.stdio.warn("RCAHandler Failed to get oceanbase cluster name:{0}".format(e)) - self.stdio.verbose("RCAHandler.init get oceanbase cluster name {0}".format(cluster_name)) + self.stdio.warn("failed to get oceanbase cluster name:{0}".format(e)) + self.stdio.verbose("get oceanbase cluster name {0}".format(cluster_name)) return cluster_name def get_parameters_info(self): @@ -121,7 +121,7 @@ def get_parameters_info(self): writer.writerow(row) self.stdio.print("Gather parameters finished. For more details, please run cmd '" + Fore.YELLOW + "cat {0}".format(self.parameter_file_name) + Style.RESET_ALL + "'") else: - self.stdio.warn("Failed to retrieve the database version. Please check if the database connection is normal.") + self.stdio.warn("failed to retrieve the database version. Please check if the database connection is normal.") def execute(self): try: diff --git a/handler/gather/gather_tabledump.py b/handler/gather/gather_tabledump.py index 61196405..834c80c4 100644 --- a/handler/gather/gather_tabledump.py +++ b/handler/gather/gather_tabledump.py @@ -112,7 +112,7 @@ def execute(self): self.__get_table_info_v3() except Exception as e: self.stdio.error("report sql result to file: {0} failed, error: ".format(self.file_name)) - self.stdio.error("StepSQLHandler execute Exception: {0}".format(e).strip()) + self.stdio.error("GatherTableDumpHandler execute Exception: {0}".format(e).strip()) def __get_table_schema(self): sql = "show create table " + self.database + "." + self.table diff --git a/handler/gather/gather_variables.py b/handler/gather/gather_variables.py index f1e2ea99..34729a3b 100644 --- a/handler/gather/gather_variables.py +++ b/handler/gather/gather_variables.py @@ -46,8 +46,8 @@ def __init__(self, context, gather_pack_dir='./'): database="oceanbase", ) except Exception as e: - self.stdio.error("Failed to connect to database: {0}".format(e)) - raise OBDIAGFormatException("Failed to connect to database: {0}".format(e)) + self.stdio.error("failed to connect to database: {0}".format(e)) + raise OBDIAGFormatException("failed to connect to database: {0}".format(e)) def handle(self): if not self.init_option(): @@ -76,8 +76,8 @@ def get_cluster_name(self): cluster_info = self.obconn.execute_sql(sql) cluster_name = cluster_info[0][0] except Exception as e: - self.stdio.warn("RCAHandler Failed to get oceanbase cluster name:{0}".format(e)) - self.stdio.verbose("RCAHandler.init get oceanbase cluster name {0}".format(cluster_name)) + self.stdio.warn("failed to get oceanbase cluster name:{0}".format(e)) + self.stdio.verbose("get oceanbase cluster name {0}".format(cluster_name)) return cluster_name def get_variables_info(self): From 615f04da277d3b7f240e960af00c51071daf3a87 Mon Sep 17 00:00:00 2001 From: Teingi Date: Mon, 15 Jul 2024 17:03:36 +0800 Subject: [PATCH 07/18] fix: gather tabledump --- dependencies/bin/obstack_x86_64_7 | Bin diag_cmd.py | 2 +- handler/gather/gather_tabledump.py | 27 ++++++++++++++------------- 3 files changed, 15 insertions(+), 14 deletions(-) mode change 100644 => 100755 dependencies/bin/obstack_x86_64_7 diff --git a/dependencies/bin/obstack_x86_64_7 b/dependencies/bin/obstack_x86_64_7 old mode 100644 new mode 100755 diff --git a/diag_cmd.py b/diag_cmd.py index 24f8a6ec..06d83d65 100644 --- a/diag_cmd.py +++ b/diag_cmd.py @@ -622,7 +622,7 @@ def __init__(self): self.parser.add_option('--table', type='string', help="Specifies the name of the table in the database to operate on.") self.parser.add_option('--user', type='string', help="The username to use for the database connection.") self.parser.add_option('--password', type='string', help="The password for the database user. If not specified, an attempt will be made to connect without a password.", default='') - self.parser.add_option('--store_dir', type='string', help='the dir to store gather result, current dir by default.', default='./gather_report') + self.parser.add_option('--store_dir', type='string', help='the dir to store gather result, current dir by default.', default='./obdiag_gather_report') self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) def init(self, cmd, args): diff --git a/handler/gather/gather_tabledump.py b/handler/gather/gather_tabledump.py index 834c80c4..3b4484f2 100644 --- a/handler/gather/gather_tabledump.py +++ b/handler/gather/gather_tabledump.py @@ -44,18 +44,12 @@ def __init__(self, context, store_dir="./obdiag_gather_report", is_inner=False): self.result_list = [] self.store_dir = store_dir self.is_innner = is_inner - try: - if not os.path.exists(store_dir): - os.makedirs(store_dir) - except Exception as e: - self.stdio.error("init gather_report {0}".format(e)) - raise Exception("int gather_report {0}".format(e)) if self.context.get_variable("gather_timestamp", None): self.gather_timestamp = self.context.get_variable("gather_timestamp") else: self.gather_timestamp = TimeUtils.get_current_us_timestamp() - def init_config(self): + def init(self): try: self.ob_cluster = self.context.cluster_config self.obproxy_nodes = self.context.obproxy_config['servers'] @@ -68,7 +62,15 @@ def init_config(self): self.table = Util.get_option(options, 'table') user = Util.get_option(options, 'user') password = Util.get_option(options, 'password') - self.store_dir = Util.get_option(options, 'store_dir') + if not (self.database and self.database and user and password): + self.stdio.error("option --database/--table/--user/--password not found, please provide") + return False + store_dir_option = Util.get_option(options, 'store_dir') + if store_dir_option is not None and store_dir_option != './': + if not os.path.exists(os.path.abspath(store_dir_option)): + self.stdio.warn('args --store_dir [{0}]: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + os.makedirs(os.path.abspath(store_dir_option)) + self.store_dir = os.path.abspath(store_dir_option) if self.context.get_variable("gather_database", None): self.database = self.context.get_variable("gather_database") if self.context.get_variable("gather_table", None): @@ -87,7 +89,7 @@ def init_config(self): ip=self.ob_cluster.get("db_host"), port=self.ob_cluster.get("db_port"), username=self.ob_cluster.get("tenant_sys").get("user"), password=self.ob_cluster.get("tenant_sys").get("password"), stdio=self.stdio, timeout=100 ) self.tenant_connector = OBConnector(ip=self.ob_cluster.get("db_host"), port=self.ob_cluster.get("db_port"), username=user, password=password, stdio=self.stdio, timeout=100) - self.file_name = "{0}/obdiag_tabledump_result_{1}.txt".format(self.store_dir, self.gather_timestamp) + self.file_name = "{0}/obdiag_tabledump_result_{1}.txt".format(self.store_dir, TimeUtils.timestamp_to_filename_time(self.gather_timestamp)) return True except Exception as e: self.stdio.error(e) @@ -95,8 +97,8 @@ def init_config(self): def handle(self): self.start_time = time.time() - if not self.init_config(): - self.stdio.error('init config failed') + if not self.init(): + self.stdio.error('init failed') return False self.execute() if not self.is_innner: @@ -111,8 +113,7 @@ def execute(self): else: self.__get_table_info_v3() except Exception as e: - self.stdio.error("report sql result to file: {0} failed, error: ".format(self.file_name)) - self.stdio.error("GatherTableDumpHandler execute Exception: {0}".format(e).strip()) + self.stdio.error("report sql result to file: {0} failed, error: {1}".format(self.file_name, e)) def __get_table_schema(self): sql = "show create table " + self.database + "." + self.table From 38f0b69b6f6b621e6531187f3a784553d6d468ec Mon Sep 17 00:00:00 2001 From: Teingi Date: Mon, 15 Jul 2024 17:04:18 +0800 Subject: [PATCH 08/18] fix: gather tabledump --- dependencies/bin/obstack_x86_64_7 | Bin 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 dependencies/bin/obstack_x86_64_7 diff --git a/dependencies/bin/obstack_x86_64_7 b/dependencies/bin/obstack_x86_64_7 old mode 100755 new mode 100644 From 832e27a725dae29363e190d731087b26a88d7798 Mon Sep 17 00:00:00 2001 From: Teingi Date: Tue, 16 Jul 2024 11:23:43 +0800 Subject: [PATCH 09/18] fix analyze flt_trace offline --- handler/analyzer/analyze_flt_trace.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/handler/analyzer/analyze_flt_trace.py b/handler/analyzer/analyze_flt_trace.py index 8624baaf..cac530a1 100644 --- a/handler/analyzer/analyze_flt_trace.py +++ b/handler/analyzer/analyze_flt_trace.py @@ -202,8 +202,6 @@ def __get_offline_log_file(self, ssh_client, log_path, local_store_dir): if self.flt_trace_id is not None and (len(log_name_list) > 0): grep_cmd = "grep -e '{grep_args}' {log_file} > {local_store_path} ".format(grep_args=self.flt_trace_id, log_file=' '.join(log_name_list), local_store_path=local_store_path) LocalClient(self.stdio).run(grep_cmd) - log_full_path = "{gather_path}/{log_name}".format(gather_path=log_path, log_name=self.flt_trace_id) - download_file(ssh_client, log_full_path, local_store_path, self.stdio) def __get_log_name_list_offline(self): """ From 2cb2a6448c679031605ed5bc4983d30c1e765476 Mon Sep 17 00:00:00 2001 From: Teingi Date: Tue, 16 Jul 2024 16:09:56 +0800 Subject: [PATCH 10/18] remove duplicate code & optimize log --- dependencies/bin/obstack_x86_64_7 | Bin diag_cmd.py | 4 ---- handler/analyzer/analyze_variable.py | 2 +- 3 files changed, 1 insertion(+), 5 deletions(-) mode change 100644 => 100755 dependencies/bin/obstack_x86_64_7 diff --git a/dependencies/bin/obstack_x86_64_7 b/dependencies/bin/obstack_x86_64_7 old mode 100644 new mode 100755 diff --git a/diag_cmd.py b/diag_cmd.py index cc54650a..e6f18c84 100644 --- a/diag_cmd.py +++ b/diag_cmd.py @@ -901,8 +901,6 @@ def __init__(self): self.register_command(ObdiagGatherObproxyLogCommand()) self.register_command(ObdiagGatherSceneCommand()) self.register_command(ObdiagGatherAshReportCommand()) - self.register_command(ObdiagGatherParameterCommand()) - self.register_command(ObdiagGatherVariableCommand()) self.register_command(ObdiagGatherTableDumpHandler()) self.register_command(ObdiagGatherParameterCommand()) self.register_command(ObdiagGatherVariableCommand()) @@ -926,8 +924,6 @@ def __init__(self): self.register_command(ObdiagAnalyzeVariableCommand()) self.register_command(ObdiagAnalyzeSQLCommand()) self.register_command(ObdiagAnalyzeSQLReviewCommand()) - self.register_command(ObdiagAnalyzeParameterCommand()) - self.register_command(ObdiagAnalyzeVariableCommand()) class ObdiagRCACommand(MajorCommand): diff --git a/handler/analyzer/analyze_variable.py b/handler/analyzer/analyze_variable.py index 4058c868..7eafccb9 100644 --- a/handler/analyzer/analyze_variable.py +++ b/handler/analyzer/analyze_variable.py @@ -88,7 +88,7 @@ def init_option(self): self.variable_file_name = os.path.abspath(offline_file_option) self.check_file_valid() else: - self.stdio.error("an initialization variable file must be provided to find the parts where variables have changed.") + self.stdio.error("args --file need provided to find the parts where variables have changed.") exit(-1) if store_dir_option and store_dir_option != "./": From 44d000b08d73e8f1c1327635b83d8ef4c211629e Mon Sep 17 00:00:00 2001 From: Teingi Date: Tue, 16 Jul 2024 16:10:05 +0800 Subject: [PATCH 11/18] remove duplicate code & optimize log --- dependencies/bin/obstack_x86_64_7 | Bin 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 dependencies/bin/obstack_x86_64_7 diff --git a/dependencies/bin/obstack_x86_64_7 b/dependencies/bin/obstack_x86_64_7 old mode 100755 new mode 100644 From 67fd60d7d54deb70d37fa74c897710382b82bdce Mon Sep 17 00:00:00 2001 From: Teingi Date: Tue, 16 Jul 2024 16:21:13 +0800 Subject: [PATCH 12/18] fix: command auto-completion --- dependencies/bin/obstack_x86_64_7 | Bin init_obdiag_cmd.sh | 4 +++- 2 files changed, 3 insertions(+), 1 deletion(-) mode change 100644 => 100755 dependencies/bin/obstack_x86_64_7 diff --git a/dependencies/bin/obstack_x86_64_7 b/dependencies/bin/obstack_x86_64_7 old mode 100644 new mode 100755 diff --git a/init_obdiag_cmd.sh b/init_obdiag_cmd.sh index 86676960..48d812ed 100644 --- a/init_obdiag_cmd.sh +++ b/init_obdiag_cmd.sh @@ -21,7 +21,9 @@ _obdiag_completion() { if [ "$COMP_CWORD" -eq 2 ]; then type_list="log flt_trace sql sql_review parameter variable" elif [ "${COMP_WORDS[2]}" = "parameter" ] && [ "$COMP_CWORD" -eq 3 ]; then - type_list="diff non-default" + type_list="diff default" + elif [ "${COMP_WORDS[2]}" = "variable" ] && [ "$COMP_CWORD" -eq 3 ]; then + type_list="diff" fi ;; rca) From 848e902259cae705e693fd850404dc3430813e93 Mon Sep 17 00:00:00 2001 From: Teingi Date: Tue, 16 Jul 2024 16:21:24 +0800 Subject: [PATCH 13/18] fix: command auto-completion --- dependencies/bin/obstack_x86_64_7 | Bin 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 dependencies/bin/obstack_x86_64_7 diff --git a/dependencies/bin/obstack_x86_64_7 b/dependencies/bin/obstack_x86_64_7 old mode 100755 new mode 100644 From d836af6279a664547e9b95ac2d851d2b86d2008f Mon Sep 17 00:00:00 2001 From: Teingi Date: Tue, 16 Jul 2024 16:53:52 +0800 Subject: [PATCH 14/18] fix: command auto-completion --- init_obdiag_cmd.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/init_obdiag_cmd.sh b/init_obdiag_cmd.sh index 48d812ed..dc292fcf 100644 --- a/init_obdiag_cmd.sh +++ b/init_obdiag_cmd.sh @@ -40,7 +40,10 @@ _obdiag_completion() { type_list="list run" COMPREPLY=($(compgen -W "${type_list}" -- "${cur_word}")) elif [ "${COMP_WORDS[1]}" = "analyze" ] && [ "${COMP_WORDS[2]}" = "parameter" ]; then - type_list="diff non-default" + type_list="diff default" + COMPREPLY=($(compgen -W "${type_list}" -- "${cur_word}")) + elif [ "${COMP_WORDS[1]}" = "analyze" ] && [ "${COMP_WORDS[2]}" = "variable" ]; then + type_list="diff" COMPREPLY=($(compgen -W "${type_list}" -- "${cur_word}")) fi ;; From 3bbace4f19324a3eb9ed37ace43628e3b286956a Mon Sep 17 00:00:00 2001 From: Teingi Date: Tue, 16 Jul 2024 17:58:41 +0800 Subject: [PATCH 15/18] fix analyze_parameter/analyze_variable log --- handler/analyzer/analyze_parameter.py | 2 +- handler/analyzer/analyze_variable.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/handler/analyzer/analyze_parameter.py b/handler/analyzer/analyze_parameter.py index 0cd4e58b..bb2d8117 100644 --- a/handler/analyzer/analyze_parameter.py +++ b/handler/analyzer/analyze_parameter.py @@ -135,7 +135,7 @@ def init_option_diff(self): if offline_file_option: if not os.path.exists(os.path.abspath(offline_file_option)): self.stdio.error('args --file [{0}] not exist: No such file, Please specify it again'.format(os.path.abspath(offline_file_option))) - exit(-1) + return False else: self.parameter_file_name = os.path.abspath(offline_file_option) self.check_file_valid() diff --git a/handler/analyzer/analyze_variable.py b/handler/analyzer/analyze_variable.py index ca07ede4..fd2255dc 100644 --- a/handler/analyzer/analyze_variable.py +++ b/handler/analyzer/analyze_variable.py @@ -83,13 +83,13 @@ def init_option(self): if offline_file_option: if not os.path.exists(os.path.abspath(offline_file_option)): self.stdio.error('args --file [{0}] not exist: No such file, Please specify it again'.format(os.path.abspath(offline_file_option))) - exit(-1) + return False else: self.variable_file_name = os.path.abspath(offline_file_option) self.check_file_valid() else: self.stdio.error("args --file need provided to find the parts where variables have changed.") - exit(-1) + return False if store_dir_option and store_dir_option != "./": if not os.path.exists(os.path.abspath(store_dir_option)): From 57a6dd399a7dd8ecc2ff1fda2425737f7534e375 Mon Sep 17 00:00:00 2001 From: Teingi Date: Tue, 16 Jul 2024 20:34:37 +0800 Subject: [PATCH 16/18] add gathering scenario sql --- handler/gather/tasks/observer/clog_disk_full.yaml | 9 +++++++++ handler/gather/tasks/observer/compaction.yaml | 9 +++++++++ .../tasks/observer/delay_of_primary_and_backup.yaml | 6 ++++++ handler/gather/tasks/observer/log_archive.yaml | 3 +++ handler/gather/tasks/observer/long_transaction.yaml | 3 +++ handler/gather/tasks/observer/memory.yaml | 5 ++++- handler/gather/tasks/observer/recovery.yaml | 3 +++ handler/gather/tasks/observer/rootservice_switch.yaml | 3 +++ handler/gather/tasks/observer/suspend_transaction.yaml | 3 +++ handler/gather/tasks/observer/unit_data_imbalance.yaml | 5 ++++- handler/gather/tasks/observer/unknown.yaml | 9 +++++++++ 11 files changed, 56 insertions(+), 2 deletions(-) diff --git a/handler/gather/tasks/observer/clog_disk_full.yaml b/handler/gather/tasks/observer/clog_disk_full.yaml index ecedeab0..3131b991 100644 --- a/handler/gather/tasks/observer/clog_disk_full.yaml +++ b/handler/gather/tasks/observer/clog_disk_full.yaml @@ -105,6 +105,15 @@ task: - type: sql sql: "select * from oceanbase.DBA_OB_SERVER_EVENT_HISTORY where event like '%migrat%' and name6 like '%fail%' and value6=1;" global: true + - type: sql + sql: "select b.zone, a.svr_ip,a.svr_port,b.with_rootserver, round(a.total_size/1024/1024/1024,0) as total_GB,round(a.free_size/1024/1024/1024,0) as free_GB, round((a.total_size-a.free_size)/1024/1024/1024,0) as used_GB,(a.total_size-a.free_size)/a.total_size as used_percent from oceanbase.__all_virtual_disk_stat a, oceanbase.DBA_OB_SERVERS b where a.svr_ip=b.svr_ip and a.svr_port=b.svr_port order by b.zone,used_percent desc;" + global: true + - type: sql + sql: "select tenant_name, svr_ip, memstore_limit /(1024 * 1024 * 1024) as memstore_limit_GB, freeze_trigger /(1024 * 1024 * 1024) as freeze_trigger_GB, memstore_used /(1024 * 1024 * 1024) as memstore_used_GB, concat((memstore_used * 100 / memstore_limit), '%') as memstore_used_percent, active_span /(1024 * 1024 * 1024) as active_span_GB, freeze_cnt from oceanbase.GV$OB_MEMSTORE memstore_info inner join oceanbase.DBA_OB_TENANTS tenant on memstore_info.tenant_id = tenant.tenant_id ORDER BY tenant.tenant_name,svr_ip;" + global: true + - type: sql # 查看每台机器上的tablet分布 + sql: "select svr_ip, svr_port, ls_id, count(*) from oceanbase.CDB_OB_TABLE_LOCATIONS group by svr_ip, svr_port, ls_id;" + global: true - type: ssh ssh: "df -h" global: false diff --git a/handler/gather/tasks/observer/compaction.yaml b/handler/gather/tasks/observer/compaction.yaml index 87f9dbbf..f59fcbcb 100644 --- a/handler/gather/tasks/observer/compaction.yaml +++ b/handler/gather/tasks/observer/compaction.yaml @@ -150,6 +150,15 @@ task: - type: sql sql: "select t.tenant_name, t1.database_name, round(sum(t2.data_size)/1024/1024/1024,2) as data_size_gb, round(sum(t2.required_size)/1024/1024/1024,2) as required_size_gb from oceanbase.dba_ob_tenants t, oceanbase.cdb_ob_table_locations t1, oceanbase.cdb_ob_tablet_replicas t2 where t.tenant_id=t1.tenant_id and t1.svr_ip=t2.svr_ip and t1.tenant_id=t2.tenant_id and t1.ls_id=t2.ls_id and t1.tablet_id=t2.tablet_id and t1.role='leader' group by t.tenant_name, t1.database_name order by data_size_gb desc;" global: true + - type: sql + sql: "select b.zone, a.svr_ip,a.svr_port,b.with_rootserver, round(a.total_size/1024/1024/1024,0) as total_GB,round(a.free_size/1024/1024/1024,0) as free_GB, round((a.total_size-a.free_size)/1024/1024/1024,0) as used_GB,(a.total_size-a.free_size)/a.total_size as used_percent from oceanbase.__all_virtual_disk_stat a, oceanbase.DBA_OB_SERVERS b where a.svr_ip=b.svr_ip and a.svr_port=b.svr_port order by b.zone,used_percent desc;" + global: true + - type: sql + sql: "select tenant_name, svr_ip, memstore_limit /(1024 * 1024 * 1024) as memstore_limit_GB, freeze_trigger /(1024 * 1024 * 1024) as freeze_trigger_GB, memstore_used /(1024 * 1024 * 1024) as memstore_used_GB, concat((memstore_used * 100 / memstore_limit), '%') as memstore_used_percent, active_span /(1024 * 1024 * 1024) as active_span_GB, freeze_cnt from oceanbase.GV$OB_MEMSTORE memstore_info inner join oceanbase.DBA_OB_TENANTS tenant on memstore_info.tenant_id = tenant.tenant_id ORDER BY tenant.tenant_name,svr_ip;" + global: true + - type: sql # 查看每台机器上的tablet分布 + sql: "select svr_ip, svr_port, ls_id, count(*) from oceanbase.CDB_OB_TABLE_LOCATIONS group by svr_ip, svr_port, ls_id;" + global: true - type: log global: false grep: "" diff --git a/handler/gather/tasks/observer/delay_of_primary_and_backup.yaml b/handler/gather/tasks/observer/delay_of_primary_and_backup.yaml index 70a52e65..2219d22b 100644 --- a/handler/gather/tasks/observer/delay_of_primary_and_backup.yaml +++ b/handler/gather/tasks/observer/delay_of_primary_and_backup.yaml @@ -156,6 +156,12 @@ task: - type: sql sql: "SELECT LS_ID, SCN_TO_TIMESTAMP(END_SCN) FROM oceanbase.GV$OB_LOG_STAT WHERE ROLE = 'LEADER';" global: true + - type: sql + sql: "select b.zone, a.svr_ip,a.svr_port,b.with_rootserver, round(a.total_size/1024/1024/1024,0) as total_GB,round(a.free_size/1024/1024/1024,0) as free_GB, round((a.total_size-a.free_size)/1024/1024/1024,0) as used_GB,(a.total_size-a.free_size)/a.total_size as used_percent from oceanbase.__all_virtual_disk_stat a, oceanbase.DBA_OB_SERVERS b where a.svr_ip=b.svr_ip and a.svr_port=b.svr_port order by b.zone,used_percent desc;" + global: true + - type: sql # 查看每台机器上的tablet分布 + sql: "select svr_ip, svr_port, ls_id, count(*) from oceanbase.CDB_OB_TABLE_LOCATIONS group by svr_ip, svr_port, ls_id;" + global: true - type: log global: false grep: "" diff --git a/handler/gather/tasks/observer/log_archive.yaml b/handler/gather/tasks/observer/log_archive.yaml index 2d2908f0..c6d7c9ae 100644 --- a/handler/gather/tasks/observer/log_archive.yaml +++ b/handler/gather/tasks/observer/log_archive.yaml @@ -120,6 +120,9 @@ task: - type: sql sql: "SELECT * FROM oceanbase.CDB_OB_ARCHIVELOG_PIECE_FILES limit 20" global: true + - type: sql # 查看每台机器上的tablet分布 + sql: "select svr_ip, svr_port, ls_id, count(*) from oceanbase.CDB_OB_TABLE_LOCATIONS group by svr_ip, svr_port, ls_id;" + global: true - type: log global: false grep: "" diff --git a/handler/gather/tasks/observer/long_transaction.yaml b/handler/gather/tasks/observer/long_transaction.yaml index cbd4c9a4..c59a2de8 100644 --- a/handler/gather/tasks/observer/long_transaction.yaml +++ b/handler/gather/tasks/observer/long_transaction.yaml @@ -84,6 +84,9 @@ task: - type: sql sql: "SELECT count(1) FROM oceanbase.GV$OB_TRANSACTION_PARTICIPANTS WHERE CTX_CREATE_TIME < date_sub(now(), INTERVAL 600 SECOND) AND STATE = 'INIT';" global: true + - type: sql + sql: "select tenant_name, svr_ip, memstore_limit /(1024 * 1024 * 1024) as memstore_limit_GB, freeze_trigger /(1024 * 1024 * 1024) as freeze_trigger_GB, memstore_used /(1024 * 1024 * 1024) as memstore_used_GB, concat((memstore_used * 100 / memstore_limit), '%') as memstore_used_percent, active_span /(1024 * 1024 * 1024) as active_span_GB, freeze_cnt from oceanbase.GV$OB_MEMSTORE memstore_info inner join oceanbase.DBA_OB_TENANTS tenant on memstore_info.tenant_id = tenant.tenant_id ORDER BY tenant.tenant_name,svr_ip;" + global: true - type: log grep: "" global: false diff --git a/handler/gather/tasks/observer/memory.yaml b/handler/gather/tasks/observer/memory.yaml index f2650f19..cd96d560 100644 --- a/handler/gather/tasks/observer/memory.yaml +++ b/handler/gather/tasks/observer/memory.yaml @@ -94,7 +94,10 @@ task: sql: "show parameters like '%syslog_io_bandwidth_limit%';" global: true - type: sql - sql: "select * from oceanbase.GV$OB_MEMSTORE limit 20" + sql: "select tenant_name, svr_ip, memstore_limit /(1024 * 1024 * 1024) as memstore_limit_GB, freeze_trigger /(1024 * 1024 * 1024) as freeze_trigger_GB, memstore_used /(1024 * 1024 * 1024) as memstore_used_GB, concat((memstore_used * 100 / memstore_limit), '%') as memstore_used_percent, active_span /(1024 * 1024 * 1024) as active_span_GB, freeze_cnt from oceanbase.GV$OB_MEMSTORE memstore_info inner join oceanbase.DBA_OB_TENANTS tenant on memstore_info.tenant_id = tenant.tenant_id ORDER BY tenant.tenant_name,svr_ip;" + global: true + - type: sql # 大于1g的内存模块 + sql: "SELECT CASE WHEN tenant_name IS NULL THEN TENANT_ID ELSE tenant_name END tenant_name, host,ctx_name, mod_name, hold, used, count FROM ( SELECT tenant_name,tenant_id,HOST,ctx_name,mod_name,hold,used,COUNT, ROW_NUMBER () OVER ( PARTITION BY tenant_name, HOST ORDER BY hold desc) rnum FROM (SELECT b.tenant_name, a.tenant_id, concat(a.svr_ip, ':', a.svr_port) HOST, a.ctx_name, a.mod_name, round(a.hold / 1024 / 1024 / 1024) hold, round(a.used / 1024 / 1024 / 1024) used, a.COUNT FROM oceanbase.__all_virtual_memory_info a LEFT JOIN oceanbase.__all_tenant b ON a.TENANT_ID = b.TENANT_ID WHERE a.hold > 1024 * 1024 * 1024 ));" global: true - type: ssh # 可看到租户的规格、线程、队列及请求统计等信息,且这条日志每个租户每 30s 打印一次 ssh: "grep 'dump tenant info.tenant=' ${observer_data_dir}/log/observer.log | sed 's/,/,/g'" diff --git a/handler/gather/tasks/observer/recovery.yaml b/handler/gather/tasks/observer/recovery.yaml index 1d858159..10ef5441 100644 --- a/handler/gather/tasks/observer/recovery.yaml +++ b/handler/gather/tasks/observer/recovery.yaml @@ -89,6 +89,9 @@ task: - type: sql sql: "SELECT * FROM oceanbase.CDB_OB_RESTORE_HISTORY limit 20;" global: true + - type: sql # 查看每台机器上的tablet分布 + sql: "select svr_ip, svr_port, ls_id, count(*) from oceanbase.CDB_OB_TABLE_LOCATIONS group by svr_ip, svr_port, ls_id;" + global: true - type: log grep: "" global: false diff --git a/handler/gather/tasks/observer/rootservice_switch.yaml b/handler/gather/tasks/observer/rootservice_switch.yaml index be72be4e..c0ea7f71 100644 --- a/handler/gather/tasks/observer/rootservice_switch.yaml +++ b/handler/gather/tasks/observer/rootservice_switch.yaml @@ -132,6 +132,9 @@ task: - type: sql sql: "SELECT TENANT_NAME, TENANT_ID, TENANT_ROLE, STATUS, SWITCHOVER_STATUS FROM oceanbase.DBA_OB_TENANTS" global: true + - type: sql # 大于1g的内存模块 + sql: "SELECT CASE WHEN tenant_name IS NULL THEN TENANT_ID ELSE tenant_name END tenant_name, host,ctx_name, mod_name, hold, used, count FROM ( SELECT tenant_name,tenant_id,HOST,ctx_name,mod_name,hold,used,COUNT, ROW_NUMBER () OVER ( PARTITION BY tenant_name, HOST ORDER BY hold desc) rnum FROM (SELECT b.tenant_name, a.tenant_id, concat(a.svr_ip, ':', a.svr_port) HOST, a.ctx_name, a.mod_name, round(a.hold / 1024 / 1024 / 1024) hold, round(a.used / 1024 / 1024 / 1024) used, a.COUNT FROM oceanbase.__all_virtual_memory_info a LEFT JOIN oceanbase.__all_tenant b ON a.TENANT_ID = b.TENANT_ID WHERE a.hold > 1024 * 1024 * 1024 ));" + global: true - type: log grep: "" global: false diff --git a/handler/gather/tasks/observer/suspend_transaction.yaml b/handler/gather/tasks/observer/suspend_transaction.yaml index 59001c27..5f2d45cb 100644 --- a/handler/gather/tasks/observer/suspend_transaction.yaml +++ b/handler/gather/tasks/observer/suspend_transaction.yaml @@ -81,6 +81,9 @@ task: - type: sql sql: "SELECT count(1) FROM oceanbase.GV$OB_TRANSACTION_PARTICIPANTS WHERE CTX_CREATE_TIME < date_sub(now(), INTERVAL 600 SECOND) AND (STATE = 'PREPARE' OR STATE = 'REDO COMPLETE' OR STATE ='PRECOMMIT');" global: true + - type: sql + sql: "select tenant_name, svr_ip, memstore_limit /(1024 * 1024 * 1024) as memstore_limit_GB, freeze_trigger /(1024 * 1024 * 1024) as freeze_trigger_GB, memstore_used /(1024 * 1024 * 1024) as memstore_used_GB, concat((memstore_used * 100 / memstore_limit), '%') as memstore_used_percent, active_span /(1024 * 1024 * 1024) as active_span_GB, freeze_cnt from oceanbase.GV$OB_MEMSTORE memstore_info inner join oceanbase.DBA_OB_TENANTS tenant on memstore_info.tenant_id = tenant.tenant_id ORDER BY tenant.tenant_name,svr_ip;" + global: true - type: log grep: "" global: false diff --git a/handler/gather/tasks/observer/unit_data_imbalance.yaml b/handler/gather/tasks/observer/unit_data_imbalance.yaml index b9e7b54e..a81f5a10 100644 --- a/handler/gather/tasks/observer/unit_data_imbalance.yaml +++ b/handler/gather/tasks/observer/unit_data_imbalance.yaml @@ -154,7 +154,10 @@ task: sql: "select t.tenant_name, t1.database_name, round(sum(t2.data_size)/1024/1024/1024,2) as data_size_gb, round(sum(t2.required_size)/1024/1024/1024,2) as required_size_gb from oceanbase.dba_ob_tenants t, oceanbase.cdb_ob_table_locations t1, oceanbase.cdb_ob_tablet_replicas t2 where t.tenant_id=t1.tenant_id and t1.svr_ip=t2.svr_ip and t1.tenant_id=t2.tenant_id and t1.ls_id=t2.ls_id and t1.tablet_id=t2.tablet_id and t1.role='leader' group by t.tenant_name, t1.database_name order by data_size_gb desc;" global: true - type: sql - sql: "select svr_ip,total_size / 1024 / 1024 / 1024 total_G,free_size / 1024 / 1024 / 1024 free_G,(total_size - free_size) / 1024 / 1024 / 1024 used_G,(total_size - free_size) / total_size used_percentage FROM oceanbase.__all_virtual_disk_stat; " + sql: "select b.zone, a.svr_ip,a.svr_port,b.with_rootserver, round(a.total_size/1024/1024/1024,0) as total_GB,round(a.free_size/1024/1024/1024,0) as free_GB, round((a.total_size-a.free_size)/1024/1024/1024,0) as used_GB,(a.total_size-a.free_size)/a.total_size as used_percent from oceanbase.__all_virtual_disk_stat a, oceanbase.DBA_OB_SERVERS b where a.svr_ip=b.svr_ip and a.svr_port=b.svr_port order by b.zone,used_percent desc;" + global: true + - type: sql # 查看每台机器上的tablet分布 + sql: "select svr_ip, svr_port, ls_id, count(*) from oceanbase.CDB_OB_TABLE_LOCATIONS group by svr_ip, svr_port, ls_id;" global: true - type: log grep: "" diff --git a/handler/gather/tasks/observer/unknown.yaml b/handler/gather/tasks/observer/unknown.yaml index 3d22f9af..ad2978b0 100644 --- a/handler/gather/tasks/observer/unknown.yaml +++ b/handler/gather/tasks/observer/unknown.yaml @@ -69,6 +69,15 @@ task: - type: sql sql: "SELECT a.TENANT_NAME,a.TENANT_ID,b.SVR_IP FROM oceanbase.DBA_OB_TENANTS a, oceanbase.GV$OB_UNITS b WHERE a.TENANT_ID=b.TENANT_ID;" global: true + - type: sql + sql: "select b.zone, a.svr_ip,a.svr_port,b.with_rootserver, round(a.total_size/1024/1024/1024,0) as total_GB,round(a.free_size/1024/1024/1024,0) as free_GB, round((a.total_size-a.free_size)/1024/1024/1024,0) as used_GB,(a.total_size-a.free_size)/a.total_size as used_percent from oceanbase.__all_virtual_disk_stat a, oceanbase.DBA_OB_SERVERS b where a.svr_ip=b.svr_ip and a.svr_port=b.svr_port order by b.zone,used_percent desc;" + global: true + - type: sql # 大于1g的内存模块 + sql: "SELECT CASE WHEN tenant_name IS NULL THEN TENANT_ID ELSE tenant_name END tenant_name, host,ctx_name, mod_name, hold, used, count FROM ( SELECT tenant_name,tenant_id,HOST,ctx_name,mod_name,hold,used,COUNT, ROW_NUMBER () OVER ( PARTITION BY tenant_name, HOST ORDER BY hold desc) rnum FROM (SELECT b.tenant_name, a.tenant_id, concat(a.svr_ip, ':', a.svr_port) HOST, a.ctx_name, a.mod_name, round(a.hold / 1024 / 1024 / 1024) hold, round(a.used / 1024 / 1024 / 1024) used, a.COUNT FROM oceanbase.__all_virtual_memory_info a LEFT JOIN oceanbase.__all_tenant b ON a.TENANT_ID = b.TENANT_ID WHERE a.hold > 1024 * 1024 * 1024 ));" + global: true + - type: sql # 查看每台机器上的tablet分布 + sql: "select svr_ip, svr_port, ls_id, count(*) from oceanbase.CDB_OB_TABLE_LOCATIONS group by svr_ip, svr_port, ls_id;" + global: true - type: sql sql: "show parameters like '%syslog_level%';" global: true From 858b3b2b07933d4ea3cacd8a2be9f25b2933b1a0 Mon Sep 17 00:00:00 2001 From: Teingi Date: Wed, 17 Jul 2024 20:12:53 +0800 Subject: [PATCH 17/18] fix tabledump and analyze parameter --- handler/analyzer/analyze_parameter.py | 2 +- handler/gather/gather_parameters.py | 2 +- handler/gather/gather_tabledump.py | 38 +++++++++++-------- .../gather/tasks/observer/clog_disk_full.yaml | 9 ----- handler/gather/tasks/observer/compaction.yaml | 9 ----- .../observer/delay_of_primary_and_backup.yaml | 6 --- .../gather/tasks/observer/log_archive.yaml | 3 -- .../tasks/observer/long_transaction.yaml | 3 -- handler/gather/tasks/observer/memory.yaml | 5 +-- handler/gather/tasks/observer/recovery.yaml | 3 -- .../tasks/observer/rootservice_switch.yaml | 3 -- .../tasks/observer/suspend_transaction.yaml | 3 -- .../tasks/observer/unit_data_imbalance.yaml | 5 +-- handler/gather/tasks/observer/unknown.yaml | 9 ----- 14 files changed, 26 insertions(+), 74 deletions(-) diff --git a/handler/analyzer/analyze_parameter.py b/handler/analyzer/analyze_parameter.py index bb2d8117..98f094a8 100644 --- a/handler/analyzer/analyze_parameter.py +++ b/handler/analyzer/analyze_parameter.py @@ -162,7 +162,7 @@ def analyze_parameter_default(self): report_default_tb.add_row([row[1], row[2], row[3], row[4], tenant_id, row[6], row[11], row[7]]) fp.write(report_default_tb.get_string() + "\n") self.stdio.print(report_default_tb.get_string()) - self.stdio.print("Analyze parameter default finished. For more details, please run cmd '" + Fore.YELLOW + " cat {0}' ".format(file_name) + Style.RESET_ALL) + self.stdio.print("Analyze parameter default finished. For more details, please run cmd '" + Fore.YELLOW + " cat {0} ".format(file_name) + Style.RESET_ALL + "'") else: if self.parameter_file_name is None: self.stdio.error("the version of OceanBase is lower than 4.2.2, an initialization parameter file must be provided to find non-default values") diff --git a/handler/gather/gather_parameters.py b/handler/gather/gather_parameters.py index 187fb779..0b9f8002 100644 --- a/handler/gather/gather_parameters.py +++ b/handler/gather/gather_parameters.py @@ -121,7 +121,7 @@ def get_parameters_info(self): writer.writerow(tmp_row) else: writer.writerow(row) - self.stdio.print("Gather parameters finished. For more details, please run cmd '" + Fore.YELLOW + "cat {0}".format(self.parameter_file_name) + Style.RESET_ALL + "'") + self.stdio.print("Gather parameters finished. For more details, please run cmd " + Fore.YELLOW + "cat '{0}".format(self.parameter_file_name) + Style.RESET_ALL + "'") else: self.stdio.warn("Failed to retrieve the database version. Please check if the database connection is normal.") diff --git a/handler/gather/gather_tabledump.py b/handler/gather/gather_tabledump.py index 3b4484f2..2a790e1f 100644 --- a/handler/gather/gather_tabledump.py +++ b/handler/gather/gather_tabledump.py @@ -62,8 +62,8 @@ def init(self): self.table = Util.get_option(options, 'table') user = Util.get_option(options, 'user') password = Util.get_option(options, 'password') - if not (self.database and self.database and user and password): - self.stdio.error("option --database/--table/--user/--password not found, please provide") + if not (self.database and self.table and user): + self.stdio.error("option --database/--table/--user not found, please provide") return False store_dir_option = Util.get_option(options, 'store_dir') if store_dir_option is not None and store_dir_option != './': @@ -100,27 +100,31 @@ def handle(self): if not self.init(): self.stdio.error('init failed') return False - self.execute() - if not self.is_innner: + excute_status = self.execute() + if not self.is_innner and excute_status: self.__print_result() def execute(self): try: self.version = get_observer_version(self.context) - self.__get_table_schema() - if self.version == "4.0.0.0" or StringUtils.compare_versions_greater(self.version, "4.0.0.0"): - self.__get_table_info() - else: - self.__get_table_info_v3() + if self.__get_table_schema(): + if self.version == "4.0.0.0" or StringUtils.compare_versions_greater(self.version, "4.0.0.0"): + return self.__get_table_info() + else: + return self.__get_table_info_v3() except Exception as e: - self.stdio.error("report sql result to file: {0} failed, error: {1}".format(self.file_name, e)) + self.stdio.error("report sql result failed, error: {0}".format(e)) def __get_table_schema(self): - sql = "show create table " + self.database + "." + self.table - columns, result = self.tenant_connector.execute_sql_return_columns_and_data(sql) - if result is None or len(result) == 0: - self.stdio.verbose("excute sql: {0}, result is None".format(sql)) - self.__report(sql, columns, result) + try: + sql = "show create table " + self.database + "." + self.table + columns, result = self.tenant_connector.execute_sql_return_columns_and_data(sql) + if result is None or len(result) == 0: + self.stdio.verbose("excute sql: {0}, result is None".format(sql)) + self.__report(sql, columns, result) + return True + except Exception as e: + self.stdio.error("show create table error {0}".format(e)) def __get_table_info(self): try: @@ -172,6 +176,7 @@ def __get_table_info(self): return self.stdio.print("data size {0}".format(result)) self.__report(query_data, columns, result) + return True except Exception as e: self.stdio.error("getTableInfo execute Exception: {0}".format(e).strip()) @@ -203,6 +208,7 @@ def __get_table_info_v3(self): return self.stdio.print("table count {0}".format(result)) self.__report(query_count, columns, result) + return True except Exception as e: self.stdio.error("getTableInfo execute Exception: {0}".format(e).strip()) @@ -215,7 +221,7 @@ def __report(self, sql, column_names, data): f.write('\n\n' + 'obclient > ' + sql + '\n') f.write(formatted_table) except Exception as e: - self.stdio.error("report sql result to file: {0} failed, error: ".format(self.file_name)) + self.stdio.error("report sql result to file: {0} failed, error:{1} ".format(self.file_name, e)) def __extract_string(self, s): if '@' in s: diff --git a/handler/gather/tasks/observer/clog_disk_full.yaml b/handler/gather/tasks/observer/clog_disk_full.yaml index 3131b991..ecedeab0 100644 --- a/handler/gather/tasks/observer/clog_disk_full.yaml +++ b/handler/gather/tasks/observer/clog_disk_full.yaml @@ -105,15 +105,6 @@ task: - type: sql sql: "select * from oceanbase.DBA_OB_SERVER_EVENT_HISTORY where event like '%migrat%' and name6 like '%fail%' and value6=1;" global: true - - type: sql - sql: "select b.zone, a.svr_ip,a.svr_port,b.with_rootserver, round(a.total_size/1024/1024/1024,0) as total_GB,round(a.free_size/1024/1024/1024,0) as free_GB, round((a.total_size-a.free_size)/1024/1024/1024,0) as used_GB,(a.total_size-a.free_size)/a.total_size as used_percent from oceanbase.__all_virtual_disk_stat a, oceanbase.DBA_OB_SERVERS b where a.svr_ip=b.svr_ip and a.svr_port=b.svr_port order by b.zone,used_percent desc;" - global: true - - type: sql - sql: "select tenant_name, svr_ip, memstore_limit /(1024 * 1024 * 1024) as memstore_limit_GB, freeze_trigger /(1024 * 1024 * 1024) as freeze_trigger_GB, memstore_used /(1024 * 1024 * 1024) as memstore_used_GB, concat((memstore_used * 100 / memstore_limit), '%') as memstore_used_percent, active_span /(1024 * 1024 * 1024) as active_span_GB, freeze_cnt from oceanbase.GV$OB_MEMSTORE memstore_info inner join oceanbase.DBA_OB_TENANTS tenant on memstore_info.tenant_id = tenant.tenant_id ORDER BY tenant.tenant_name,svr_ip;" - global: true - - type: sql # 查看每台机器上的tablet分布 - sql: "select svr_ip, svr_port, ls_id, count(*) from oceanbase.CDB_OB_TABLE_LOCATIONS group by svr_ip, svr_port, ls_id;" - global: true - type: ssh ssh: "df -h" global: false diff --git a/handler/gather/tasks/observer/compaction.yaml b/handler/gather/tasks/observer/compaction.yaml index f59fcbcb..87f9dbbf 100644 --- a/handler/gather/tasks/observer/compaction.yaml +++ b/handler/gather/tasks/observer/compaction.yaml @@ -150,15 +150,6 @@ task: - type: sql sql: "select t.tenant_name, t1.database_name, round(sum(t2.data_size)/1024/1024/1024,2) as data_size_gb, round(sum(t2.required_size)/1024/1024/1024,2) as required_size_gb from oceanbase.dba_ob_tenants t, oceanbase.cdb_ob_table_locations t1, oceanbase.cdb_ob_tablet_replicas t2 where t.tenant_id=t1.tenant_id and t1.svr_ip=t2.svr_ip and t1.tenant_id=t2.tenant_id and t1.ls_id=t2.ls_id and t1.tablet_id=t2.tablet_id and t1.role='leader' group by t.tenant_name, t1.database_name order by data_size_gb desc;" global: true - - type: sql - sql: "select b.zone, a.svr_ip,a.svr_port,b.with_rootserver, round(a.total_size/1024/1024/1024,0) as total_GB,round(a.free_size/1024/1024/1024,0) as free_GB, round((a.total_size-a.free_size)/1024/1024/1024,0) as used_GB,(a.total_size-a.free_size)/a.total_size as used_percent from oceanbase.__all_virtual_disk_stat a, oceanbase.DBA_OB_SERVERS b where a.svr_ip=b.svr_ip and a.svr_port=b.svr_port order by b.zone,used_percent desc;" - global: true - - type: sql - sql: "select tenant_name, svr_ip, memstore_limit /(1024 * 1024 * 1024) as memstore_limit_GB, freeze_trigger /(1024 * 1024 * 1024) as freeze_trigger_GB, memstore_used /(1024 * 1024 * 1024) as memstore_used_GB, concat((memstore_used * 100 / memstore_limit), '%') as memstore_used_percent, active_span /(1024 * 1024 * 1024) as active_span_GB, freeze_cnt from oceanbase.GV$OB_MEMSTORE memstore_info inner join oceanbase.DBA_OB_TENANTS tenant on memstore_info.tenant_id = tenant.tenant_id ORDER BY tenant.tenant_name,svr_ip;" - global: true - - type: sql # 查看每台机器上的tablet分布 - sql: "select svr_ip, svr_port, ls_id, count(*) from oceanbase.CDB_OB_TABLE_LOCATIONS group by svr_ip, svr_port, ls_id;" - global: true - type: log global: false grep: "" diff --git a/handler/gather/tasks/observer/delay_of_primary_and_backup.yaml b/handler/gather/tasks/observer/delay_of_primary_and_backup.yaml index 2219d22b..70a52e65 100644 --- a/handler/gather/tasks/observer/delay_of_primary_and_backup.yaml +++ b/handler/gather/tasks/observer/delay_of_primary_and_backup.yaml @@ -156,12 +156,6 @@ task: - type: sql sql: "SELECT LS_ID, SCN_TO_TIMESTAMP(END_SCN) FROM oceanbase.GV$OB_LOG_STAT WHERE ROLE = 'LEADER';" global: true - - type: sql - sql: "select b.zone, a.svr_ip,a.svr_port,b.with_rootserver, round(a.total_size/1024/1024/1024,0) as total_GB,round(a.free_size/1024/1024/1024,0) as free_GB, round((a.total_size-a.free_size)/1024/1024/1024,0) as used_GB,(a.total_size-a.free_size)/a.total_size as used_percent from oceanbase.__all_virtual_disk_stat a, oceanbase.DBA_OB_SERVERS b where a.svr_ip=b.svr_ip and a.svr_port=b.svr_port order by b.zone,used_percent desc;" - global: true - - type: sql # 查看每台机器上的tablet分布 - sql: "select svr_ip, svr_port, ls_id, count(*) from oceanbase.CDB_OB_TABLE_LOCATIONS group by svr_ip, svr_port, ls_id;" - global: true - type: log global: false grep: "" diff --git a/handler/gather/tasks/observer/log_archive.yaml b/handler/gather/tasks/observer/log_archive.yaml index c6d7c9ae..2d2908f0 100644 --- a/handler/gather/tasks/observer/log_archive.yaml +++ b/handler/gather/tasks/observer/log_archive.yaml @@ -120,9 +120,6 @@ task: - type: sql sql: "SELECT * FROM oceanbase.CDB_OB_ARCHIVELOG_PIECE_FILES limit 20" global: true - - type: sql # 查看每台机器上的tablet分布 - sql: "select svr_ip, svr_port, ls_id, count(*) from oceanbase.CDB_OB_TABLE_LOCATIONS group by svr_ip, svr_port, ls_id;" - global: true - type: log global: false grep: "" diff --git a/handler/gather/tasks/observer/long_transaction.yaml b/handler/gather/tasks/observer/long_transaction.yaml index c59a2de8..cbd4c9a4 100644 --- a/handler/gather/tasks/observer/long_transaction.yaml +++ b/handler/gather/tasks/observer/long_transaction.yaml @@ -84,9 +84,6 @@ task: - type: sql sql: "SELECT count(1) FROM oceanbase.GV$OB_TRANSACTION_PARTICIPANTS WHERE CTX_CREATE_TIME < date_sub(now(), INTERVAL 600 SECOND) AND STATE = 'INIT';" global: true - - type: sql - sql: "select tenant_name, svr_ip, memstore_limit /(1024 * 1024 * 1024) as memstore_limit_GB, freeze_trigger /(1024 * 1024 * 1024) as freeze_trigger_GB, memstore_used /(1024 * 1024 * 1024) as memstore_used_GB, concat((memstore_used * 100 / memstore_limit), '%') as memstore_used_percent, active_span /(1024 * 1024 * 1024) as active_span_GB, freeze_cnt from oceanbase.GV$OB_MEMSTORE memstore_info inner join oceanbase.DBA_OB_TENANTS tenant on memstore_info.tenant_id = tenant.tenant_id ORDER BY tenant.tenant_name,svr_ip;" - global: true - type: log grep: "" global: false diff --git a/handler/gather/tasks/observer/memory.yaml b/handler/gather/tasks/observer/memory.yaml index cd96d560..f2650f19 100644 --- a/handler/gather/tasks/observer/memory.yaml +++ b/handler/gather/tasks/observer/memory.yaml @@ -94,10 +94,7 @@ task: sql: "show parameters like '%syslog_io_bandwidth_limit%';" global: true - type: sql - sql: "select tenant_name, svr_ip, memstore_limit /(1024 * 1024 * 1024) as memstore_limit_GB, freeze_trigger /(1024 * 1024 * 1024) as freeze_trigger_GB, memstore_used /(1024 * 1024 * 1024) as memstore_used_GB, concat((memstore_used * 100 / memstore_limit), '%') as memstore_used_percent, active_span /(1024 * 1024 * 1024) as active_span_GB, freeze_cnt from oceanbase.GV$OB_MEMSTORE memstore_info inner join oceanbase.DBA_OB_TENANTS tenant on memstore_info.tenant_id = tenant.tenant_id ORDER BY tenant.tenant_name,svr_ip;" - global: true - - type: sql # 大于1g的内存模块 - sql: "SELECT CASE WHEN tenant_name IS NULL THEN TENANT_ID ELSE tenant_name END tenant_name, host,ctx_name, mod_name, hold, used, count FROM ( SELECT tenant_name,tenant_id,HOST,ctx_name,mod_name,hold,used,COUNT, ROW_NUMBER () OVER ( PARTITION BY tenant_name, HOST ORDER BY hold desc) rnum FROM (SELECT b.tenant_name, a.tenant_id, concat(a.svr_ip, ':', a.svr_port) HOST, a.ctx_name, a.mod_name, round(a.hold / 1024 / 1024 / 1024) hold, round(a.used / 1024 / 1024 / 1024) used, a.COUNT FROM oceanbase.__all_virtual_memory_info a LEFT JOIN oceanbase.__all_tenant b ON a.TENANT_ID = b.TENANT_ID WHERE a.hold > 1024 * 1024 * 1024 ));" + sql: "select * from oceanbase.GV$OB_MEMSTORE limit 20" global: true - type: ssh # 可看到租户的规格、线程、队列及请求统计等信息,且这条日志每个租户每 30s 打印一次 ssh: "grep 'dump tenant info.tenant=' ${observer_data_dir}/log/observer.log | sed 's/,/,/g'" diff --git a/handler/gather/tasks/observer/recovery.yaml b/handler/gather/tasks/observer/recovery.yaml index 10ef5441..1d858159 100644 --- a/handler/gather/tasks/observer/recovery.yaml +++ b/handler/gather/tasks/observer/recovery.yaml @@ -89,9 +89,6 @@ task: - type: sql sql: "SELECT * FROM oceanbase.CDB_OB_RESTORE_HISTORY limit 20;" global: true - - type: sql # 查看每台机器上的tablet分布 - sql: "select svr_ip, svr_port, ls_id, count(*) from oceanbase.CDB_OB_TABLE_LOCATIONS group by svr_ip, svr_port, ls_id;" - global: true - type: log grep: "" global: false diff --git a/handler/gather/tasks/observer/rootservice_switch.yaml b/handler/gather/tasks/observer/rootservice_switch.yaml index c0ea7f71..be72be4e 100644 --- a/handler/gather/tasks/observer/rootservice_switch.yaml +++ b/handler/gather/tasks/observer/rootservice_switch.yaml @@ -132,9 +132,6 @@ task: - type: sql sql: "SELECT TENANT_NAME, TENANT_ID, TENANT_ROLE, STATUS, SWITCHOVER_STATUS FROM oceanbase.DBA_OB_TENANTS" global: true - - type: sql # 大于1g的内存模块 - sql: "SELECT CASE WHEN tenant_name IS NULL THEN TENANT_ID ELSE tenant_name END tenant_name, host,ctx_name, mod_name, hold, used, count FROM ( SELECT tenant_name,tenant_id,HOST,ctx_name,mod_name,hold,used,COUNT, ROW_NUMBER () OVER ( PARTITION BY tenant_name, HOST ORDER BY hold desc) rnum FROM (SELECT b.tenant_name, a.tenant_id, concat(a.svr_ip, ':', a.svr_port) HOST, a.ctx_name, a.mod_name, round(a.hold / 1024 / 1024 / 1024) hold, round(a.used / 1024 / 1024 / 1024) used, a.COUNT FROM oceanbase.__all_virtual_memory_info a LEFT JOIN oceanbase.__all_tenant b ON a.TENANT_ID = b.TENANT_ID WHERE a.hold > 1024 * 1024 * 1024 ));" - global: true - type: log grep: "" global: false diff --git a/handler/gather/tasks/observer/suspend_transaction.yaml b/handler/gather/tasks/observer/suspend_transaction.yaml index 5f2d45cb..59001c27 100644 --- a/handler/gather/tasks/observer/suspend_transaction.yaml +++ b/handler/gather/tasks/observer/suspend_transaction.yaml @@ -81,9 +81,6 @@ task: - type: sql sql: "SELECT count(1) FROM oceanbase.GV$OB_TRANSACTION_PARTICIPANTS WHERE CTX_CREATE_TIME < date_sub(now(), INTERVAL 600 SECOND) AND (STATE = 'PREPARE' OR STATE = 'REDO COMPLETE' OR STATE ='PRECOMMIT');" global: true - - type: sql - sql: "select tenant_name, svr_ip, memstore_limit /(1024 * 1024 * 1024) as memstore_limit_GB, freeze_trigger /(1024 * 1024 * 1024) as freeze_trigger_GB, memstore_used /(1024 * 1024 * 1024) as memstore_used_GB, concat((memstore_used * 100 / memstore_limit), '%') as memstore_used_percent, active_span /(1024 * 1024 * 1024) as active_span_GB, freeze_cnt from oceanbase.GV$OB_MEMSTORE memstore_info inner join oceanbase.DBA_OB_TENANTS tenant on memstore_info.tenant_id = tenant.tenant_id ORDER BY tenant.tenant_name,svr_ip;" - global: true - type: log grep: "" global: false diff --git a/handler/gather/tasks/observer/unit_data_imbalance.yaml b/handler/gather/tasks/observer/unit_data_imbalance.yaml index a81f5a10..b9e7b54e 100644 --- a/handler/gather/tasks/observer/unit_data_imbalance.yaml +++ b/handler/gather/tasks/observer/unit_data_imbalance.yaml @@ -154,10 +154,7 @@ task: sql: "select t.tenant_name, t1.database_name, round(sum(t2.data_size)/1024/1024/1024,2) as data_size_gb, round(sum(t2.required_size)/1024/1024/1024,2) as required_size_gb from oceanbase.dba_ob_tenants t, oceanbase.cdb_ob_table_locations t1, oceanbase.cdb_ob_tablet_replicas t2 where t.tenant_id=t1.tenant_id and t1.svr_ip=t2.svr_ip and t1.tenant_id=t2.tenant_id and t1.ls_id=t2.ls_id and t1.tablet_id=t2.tablet_id and t1.role='leader' group by t.tenant_name, t1.database_name order by data_size_gb desc;" global: true - type: sql - sql: "select b.zone, a.svr_ip,a.svr_port,b.with_rootserver, round(a.total_size/1024/1024/1024,0) as total_GB,round(a.free_size/1024/1024/1024,0) as free_GB, round((a.total_size-a.free_size)/1024/1024/1024,0) as used_GB,(a.total_size-a.free_size)/a.total_size as used_percent from oceanbase.__all_virtual_disk_stat a, oceanbase.DBA_OB_SERVERS b where a.svr_ip=b.svr_ip and a.svr_port=b.svr_port order by b.zone,used_percent desc;" - global: true - - type: sql # 查看每台机器上的tablet分布 - sql: "select svr_ip, svr_port, ls_id, count(*) from oceanbase.CDB_OB_TABLE_LOCATIONS group by svr_ip, svr_port, ls_id;" + sql: "select svr_ip,total_size / 1024 / 1024 / 1024 total_G,free_size / 1024 / 1024 / 1024 free_G,(total_size - free_size) / 1024 / 1024 / 1024 used_G,(total_size - free_size) / total_size used_percentage FROM oceanbase.__all_virtual_disk_stat; " global: true - type: log grep: "" diff --git a/handler/gather/tasks/observer/unknown.yaml b/handler/gather/tasks/observer/unknown.yaml index ad2978b0..3d22f9af 100644 --- a/handler/gather/tasks/observer/unknown.yaml +++ b/handler/gather/tasks/observer/unknown.yaml @@ -69,15 +69,6 @@ task: - type: sql sql: "SELECT a.TENANT_NAME,a.TENANT_ID,b.SVR_IP FROM oceanbase.DBA_OB_TENANTS a, oceanbase.GV$OB_UNITS b WHERE a.TENANT_ID=b.TENANT_ID;" global: true - - type: sql - sql: "select b.zone, a.svr_ip,a.svr_port,b.with_rootserver, round(a.total_size/1024/1024/1024,0) as total_GB,round(a.free_size/1024/1024/1024,0) as free_GB, round((a.total_size-a.free_size)/1024/1024/1024,0) as used_GB,(a.total_size-a.free_size)/a.total_size as used_percent from oceanbase.__all_virtual_disk_stat a, oceanbase.DBA_OB_SERVERS b where a.svr_ip=b.svr_ip and a.svr_port=b.svr_port order by b.zone,used_percent desc;" - global: true - - type: sql # 大于1g的内存模块 - sql: "SELECT CASE WHEN tenant_name IS NULL THEN TENANT_ID ELSE tenant_name END tenant_name, host,ctx_name, mod_name, hold, used, count FROM ( SELECT tenant_name,tenant_id,HOST,ctx_name,mod_name,hold,used,COUNT, ROW_NUMBER () OVER ( PARTITION BY tenant_name, HOST ORDER BY hold desc) rnum FROM (SELECT b.tenant_name, a.tenant_id, concat(a.svr_ip, ':', a.svr_port) HOST, a.ctx_name, a.mod_name, round(a.hold / 1024 / 1024 / 1024) hold, round(a.used / 1024 / 1024 / 1024) used, a.COUNT FROM oceanbase.__all_virtual_memory_info a LEFT JOIN oceanbase.__all_tenant b ON a.TENANT_ID = b.TENANT_ID WHERE a.hold > 1024 * 1024 * 1024 ));" - global: true - - type: sql # 查看每台机器上的tablet分布 - sql: "select svr_ip, svr_port, ls_id, count(*) from oceanbase.CDB_OB_TABLE_LOCATIONS group by svr_ip, svr_port, ls_id;" - global: true - type: sql sql: "show parameters like '%syslog_level%';" global: true From c4c060ed0c3b2efd9dddf7b6be75cddcca4c0dc6 Mon Sep 17 00:00:00 2001 From: Teingi Date: Wed, 17 Jul 2024 20:15:21 +0800 Subject: [PATCH 18/18] fix tabledump and analyze parameter --- handler/gather/gather_parameters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/handler/gather/gather_parameters.py b/handler/gather/gather_parameters.py index 0b9f8002..187fb779 100644 --- a/handler/gather/gather_parameters.py +++ b/handler/gather/gather_parameters.py @@ -121,7 +121,7 @@ def get_parameters_info(self): writer.writerow(tmp_row) else: writer.writerow(row) - self.stdio.print("Gather parameters finished. For more details, please run cmd " + Fore.YELLOW + "cat '{0}".format(self.parameter_file_name) + Style.RESET_ALL + "'") + self.stdio.print("Gather parameters finished. For more details, please run cmd '" + Fore.YELLOW + "cat {0}".format(self.parameter_file_name) + Style.RESET_ALL + "'") else: self.stdio.warn("Failed to retrieve the database version. Please check if the database connection is normal.")