From 45a8145cbb7006ec63e629cdeeca3e6fe9bc293a Mon Sep 17 00:00:00 2001 From: Teingi Date: Wed, 10 Jul 2024 14:46:01 +0800 Subject: [PATCH 01/68] Command Completion --- handler/analyzer/analyze_sql.py | 4 ++++ init_obdiag_cmd.sh | 11 +++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/handler/analyzer/analyze_sql.py b/handler/analyzer/analyze_sql.py index d54168e3..0d9a2276 100644 --- a/handler/analyzer/analyze_sql.py +++ b/handler/analyzer/analyze_sql.py @@ -126,7 +126,11 @@ def init_config(self): def init_ob_version(self): self.stdio.print('get observer version start') +<<<<<<< HEAD + self.ob_version = get_observer_version_by_sql(self.ob_cluster, self.stdio) +======= self.ob_version = get_observer_version(self.context) +>>>>>>> origin/master self.stdio.print('get observer version complete, version:{0}'.format(self.ob_version)) return True diff --git a/init_obdiag_cmd.sh b/init_obdiag_cmd.sh index 38247c22..86676960 100644 --- a/init_obdiag_cmd.sh +++ b/init_obdiag_cmd.sh @@ -12,13 +12,17 @@ _obdiag_completion() { case "${COMP_WORDS[1]}" in gather) if [ "$COMP_CWORD" -eq 2 ]; then - type_list="log clog slog plan_monitor stack perf sysstat obproxy_log all scene ash tabledump" + type_list="log clog slog plan_monitor stack perf sysstat obproxy_log all scene ash tabledump parameter variable" elif [ "${COMP_WORDS[2]}" = "scene" ] && [ "$COMP_CWORD" -eq 3 ]; then type_list="list run" fi ;; analyze) - type_list="log flt_trace sql sql_review" + if [ "$COMP_CWORD" -eq 2 ]; then + type_list="log flt_trace sql sql_review parameter variable" + elif [ "${COMP_WORDS[2]}" = "parameter" ] && [ "$COMP_CWORD" -eq 3 ]; then + type_list="diff non-default" + fi ;; rca) type_list="list run" @@ -33,6 +37,9 @@ _obdiag_completion() { if [ "${COMP_WORDS[1]}" = "gather" ] && [ "${COMP_WORDS[2]}" = "scene" ]; then type_list="list run" COMPREPLY=($(compgen -W "${type_list}" -- "${cur_word}")) + elif [ "${COMP_WORDS[1]}" = "analyze" ] && [ "${COMP_WORDS[2]}" = "parameter" ]; then + type_list="diff non-default" + COMPREPLY=($(compgen -W "${type_list}" -- "${cur_word}")) fi ;; *) From d30c4d76d5d9b8ee99f066f80a6ecc2c92376b3a Mon Sep 17 00:00:00 2001 From: Teingi Date: Wed, 10 Jul 2024 14:50:08 +0800 Subject: [PATCH 02/68] Command Completion --- handler/analyzer/analyze_sql.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/handler/analyzer/analyze_sql.py b/handler/analyzer/analyze_sql.py index 0d9a2276..d54168e3 100644 --- a/handler/analyzer/analyze_sql.py +++ b/handler/analyzer/analyze_sql.py @@ -126,11 +126,7 @@ def init_config(self): def init_ob_version(self): self.stdio.print('get observer version start') -<<<<<<< HEAD - self.ob_version = get_observer_version_by_sql(self.ob_cluster, self.stdio) -======= self.ob_version = get_observer_version(self.context) ->>>>>>> origin/master self.stdio.print('get observer version complete, version:{0}'.format(self.ob_version)) return True From 21f92b3609a68e3cda86c0df7a66244486bf3667 Mon Sep 17 00:00:00 2001 From: "jingshun.tq" <35712518+Teingi@users.noreply.github.com> Date: Wed, 10 Jul 2024 15:57:28 +0800 Subject: [PATCH 03/68] fix: Command Completion (#317) * Command Completion * Command Completion --- init_obdiag_cmd.sh | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/init_obdiag_cmd.sh b/init_obdiag_cmd.sh index 38247c22..86676960 100644 --- a/init_obdiag_cmd.sh +++ b/init_obdiag_cmd.sh @@ -12,13 +12,17 @@ _obdiag_completion() { case "${COMP_WORDS[1]}" in gather) if [ "$COMP_CWORD" -eq 2 ]; then - type_list="log clog slog plan_monitor stack perf sysstat obproxy_log all scene ash tabledump" + type_list="log clog slog plan_monitor stack perf sysstat obproxy_log all scene ash tabledump parameter variable" elif [ "${COMP_WORDS[2]}" = "scene" ] && [ "$COMP_CWORD" -eq 3 ]; then type_list="list run" fi ;; analyze) - type_list="log flt_trace sql sql_review" + if [ "$COMP_CWORD" -eq 2 ]; then + type_list="log flt_trace sql sql_review parameter variable" + elif [ "${COMP_WORDS[2]}" = "parameter" ] && [ "$COMP_CWORD" -eq 3 ]; then + type_list="diff non-default" + fi ;; rca) type_list="list run" @@ -33,6 +37,9 @@ _obdiag_completion() { if [ "${COMP_WORDS[1]}" = "gather" ] && [ "${COMP_WORDS[2]}" = "scene" ]; then type_list="list run" COMPREPLY=($(compgen -W "${type_list}" -- "${cur_word}")) + elif [ "${COMP_WORDS[1]}" = "analyze" ] && [ "${COMP_WORDS[2]}" = "parameter" ]; then + type_list="diff non-default" + COMPREPLY=($(compgen -W "${type_list}" -- "${cur_word}")) fi ;; *) From bec8f64c27c265aa00e9e287408b10762fa7e95c Mon Sep 17 00:00:00 2001 From: wayyoungboy <35394786+wayyoungboy@users.noreply.github.com> Date: Wed, 10 Jul 2024 16:42:01 +0800 Subject: [PATCH 04/68] fix remote client (#318) * fix KubernetesClient * fix KubernetesClient * fix KubernetesClient * fix KubernetesClient * fix "nodename nor servname provided" * delete build tag * fix ssh stdio print * fix remote_client --- common/ssh_client/remote_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/ssh_client/remote_client.py b/common/ssh_client/remote_client.py index a24146fd..6bcfdaf6 100644 --- a/common/ssh_client/remote_client.py +++ b/common/ssh_client/remote_client.py @@ -42,7 +42,7 @@ def __init__(self, context, node): self._sftp_client = None self._disabled_rsa_algorithms = None self.host_ip = self.node.get("ip") - self.username = self.node.get("username") + self.username = self.node.get("ssh_username") self.ssh_port = self.node.get("ssh_port") self.need_password = True self.password = self.node.get("ssh_password") From ca41b96ad7c2f81bd709fb2f6f93fbb697155f31 Mon Sep 17 00:00:00 2001 From: wayyoungboy <35394786+wayyoungboy@users.noreply.github.com> Date: Wed, 10 Jul 2024 20:41:11 +0800 Subject: [PATCH 05/68] fix operator example (#319) * fix KubernetesClient * fix KubernetesClient * fix KubernetesClient * fix KubernetesClient * fix "nodename nor servname provided" * delete build tag * fix ssh stdio print * fix remote_client * fix example operator.yml --- example/operator.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example/operator.yml b/example/operator.yml index 05c997ee..3409639f 100644 --- a/example/operator.yml +++ b/example/operator.yml @@ -29,7 +29,7 @@ obcluster: container_name: observer home_path: /home/admin/oceanbase data_dir: /home/admin/oceanbase/store - redo_Dir: /home/admin/oceanbase/store + redo_dir: /home/admin/oceanbase/store ip: xx.xx.xx.xx global: # if running obdiag in kubernetes, please delete the kubernetes_config_file From 0ecda317fd8b31c618ebb60d0e754a61baf109da Mon Sep 17 00:00:00 2001 From: Teingi Date: Thu, 11 Jul 2024 00:11:18 +0800 Subject: [PATCH 06/68] SQL Monitor Report add table info --- handler/gather/gather_plan_monitor.py | 43 ++++++++++++++++--- handler/gather/gather_tabledump.py | 62 +++++++++++++++++---------- 2 files changed, 78 insertions(+), 27 deletions(-) diff --git a/handler/gather/gather_plan_monitor.py b/handler/gather/gather_plan_monitor.py index 3eb4c83c..454c49a5 100644 --- a/handler/gather/gather_plan_monitor.py +++ b/handler/gather/gather_plan_monitor.py @@ -32,6 +32,7 @@ from common.tool import StringUtils from common.tool import FileUtil from common.tool import TimeUtils +from handler.gather.gather_tabledump import GatherTableDumpHandler class GatherPlanMonitorHandler(object): @@ -121,6 +122,7 @@ def handle_plan_monitor_from_ob(cluster_name): trace_id = trace[0] user_sql = trace[1] sql = trace[1] + tenant_name = trace[6] db_name = trace[8] plan_id = trace[9] tenant_id = trace[10] @@ -132,6 +134,7 @@ def handle_plan_monitor_from_ob(cluster_name): self.stdio.verbose("SVR_PORT : %s " % svr_port) self.stdio.verbose("DB: %s " % db_name) self.stdio.verbose("PLAN_ID: %s " % plan_id) + self.stdio.verbose("TENANT_NAME: %s " % tenant_name) self.stdio.verbose("TENANT_ID: %s " % tenant_id) sql_plan_monitor_svr_agg_template = self.sql_plan_monitor_svr_agg_template_sql() @@ -151,7 +154,8 @@ def handle_plan_monitor_from_ob(cluster_name): self.report_header() # 输出sql_audit的概要信息 self.stdio.verbose("[sql plan monitor report task] report sql_audit") - self.report_sql_audit() + if not self.report_sql_audit(): + return # 输出sql explain的信息 self.stdio.verbose("[sql plan monitor report task] report plan explain, sql: [{0}]".format(sql)) self.report_plan_explain(db_name, sql) @@ -160,7 +164,7 @@ def handle_plan_monitor_from_ob(cluster_name): self.report_plan_cache(plan_explain_sql) # 输出表结构的信息 self.stdio.verbose("[sql plan monitor report task] report table schema") - self.report_schema(user_sql) + self.report_schema(user_sql, tenant_name) self.init_monitor_stat() # 输出sql_audit的详细信息 self.stdio.verbose("[sql plan monitor report task] report sql_audit details") @@ -249,7 +253,16 @@ def __get_overall_summary(node_summary_tuple): summary_tab.append((cluster, "Error" if is_err else "Completed", "{0} s".format(int(consume_time)), pack_path)) return "\nGather Sql Plan Monitor Summary:\n" + tabulate.tabulate(summary_tab, headers=field_names, tablefmt="grid", showindex=False) - def report_schema(self, sql): + def get_table_info(self, file_path): + try: + with open(file_path, 'r', encoding='utf-8') as f: + data = f.read() + return data + except Exception as e: + self.stdio.error(e) + return None + + def report_schema(self, sql, tenant_name): try: schemas = "" valid_words = [] @@ -262,10 +275,24 @@ def report_schema(self, sql): for t in valid_words: try: data = self.db_connector.execute_sql("show create table %s" % t) - schemas = schemas + "
{0}
".format(data[0][1]) - self.stdio.verbose("table schema: {0}".format(schemas)) + self.context.set_variable('gather_tenant_name', tenant_name) + self.context.set_variable('gather_database', self.db_conn.get("database")) + self.context.set_variable('gather_table', t) + self.context.set_variable('gather_user', self.db_conn.get("user")) + self.context.set_variable('gather_password', self.db_conn.get("password")) + self.context.set_variable('store_dir', self.local_stored_path) + self.context.set_variable('gather_timestamp', self.gather_timestamp) + handler = GatherTableDumpHandler(self.context, self.local_stored_path, is_inner=True) + handler.handle() except Exception as e: pass + table_info_file = os.path.join(self.local_stored_path, "obdiag_tabledump_result_{0}.txt".format(self.gather_timestamp)) + self.stdio.print("table info file path:{0}".format(table_info_file)) + table_info = self.get_table_info(table_info_file) + if table_info: + schemas = schemas + "
%s
" % table_info + if len(table_info_file) > 25: + FileUtil.rm(table_info_file) cursor = self.sys_connector.execute_sql_return_cursor("show variables like '%parallel%'") s = from_db_cursor(cursor) s.align = 'l' @@ -809,10 +836,14 @@ def report_sql_audit(self): self.stdio.verbose("select sql_audit from ob with SQL: %s", sql) try: sql_audit_result = self.sys_connector.execute_sql_pretty(sql) + if not sql_audit_result: + self.stdio.error("failed to find the related sql_audit for the given trace_id:{0}", self.trace_id) + return False self.stdio.verbose("sql_audit_result: %s", sql_audit_result) self.stdio.verbose("report sql_audit_result to file start ...") self.__report(sql_audit_result.get_html_string()) self.stdio.verbose("report sql_audit_result end") + return True except Exception as e: self.stdio.exception("sql_audit> %s" % sql) self.stdio.exception(repr(e)) @@ -839,6 +870,8 @@ def report_plan_explain(self, db_name, raw_sql): def report_sql_plan_monitor_dfo_op(self, sql): data_sql_plan_monitor_dfo_op = self.sys_connector.execute_sql_pretty(sql) + if len(data_sql_plan_monitor_dfo_op.rows) == 0: + self.stdio.warn("failed to find sql_plan_monitor data, please add hint /*+ monitor*/ to your SQL before executing it.") self.__report("

SQL_PLAN_MONITOR DFO 级调度时序汇总

") self.stdio.verbose("report SQL_PLAN_MONITOR DFO complete") cursor_sql_plan_monitor_dfo_op = self.sys_connector.execute_sql_return_cursor_dictionary(sql) diff --git a/handler/gather/gather_tabledump.py b/handler/gather/gather_tabledump.py index c0078824..52ab0538 100644 --- a/handler/gather/gather_tabledump.py +++ b/handler/gather/gather_tabledump.py @@ -25,13 +25,12 @@ from common.tool import Util from common.tool import TimeUtils from tabulate import tabulate -from handler.checker.check_exception import CheckException from colorama import Fore, Style class GatherTableDumpHandler(SafeStdio): - def __init__(self, context, task_type="observer", export_report_path="./gather_report"): + def __init__(self, context, store_dir="./obdiag_gather_report", is_inner=False): self.context = context self.stdio = context.stdio self.report = None @@ -42,10 +41,11 @@ def __init__(self, context, task_type="observer", export_report_path="./gather_r self.database = None self.table = None self.result_list = [] - self.export_report_path = export_report_path + self.store_dir = store_dir + self.is_innner = is_inner try: - if not os.path.exists(export_report_path): - os.makedirs(export_report_path) + if not os.path.exists(store_dir): + os.makedirs(store_dir) except Exception as e: self.stdio.error("init gather_report {0}".format(e)) raise Exception("int gather_report {0}".format(e)) @@ -67,15 +67,29 @@ def init_config(self): self.table = Util.get_option(options, 'table') user = Util.get_option(options, 'user') password = Util.get_option(options, 'password') - self.export_report_path = Util.get_option(options, 'store_dir') - self.tenant_name = self.__extract_string(user) + self.store_dir = Util.get_option(options, 'store_dir') + if self.context.get_variable("gather_database", None): + self.database = self.context.get_variable("gather_database") + if self.context.get_variable("gather_table", None): + self.table = self.context.get_variable("gather_table") + if self.context.get_variable("gather_user", None): + user = self.context.get_variable("gather_user") + if self.context.get_variable("gather_password", None): + password = self.context.get_variable("gather_password") + if self.context.get_variable("store_dir", None): + self.store_dir = self.context.get_variable("store_dir") + if self.context.get_variable("gather_tenant_name", None): + self.tenant_name = self.context.get_variable("gather_tenant_name") + else: + self.tenant_name = self.__extract_string(user) self.ob_connector = OBConnector( ip=self.ob_cluster.get("db_host"), port=self.ob_cluster.get("db_port"), username=self.ob_cluster.get("tenant_sys").get("user"), password=self.ob_cluster.get("tenant_sys").get("password"), stdio=self.stdio, timeout=100 ) self.tenant_connector = OBConnector(ip=self.ob_cluster.get("db_host"), port=self.ob_cluster.get("db_port"), username=user, password=password, stdio=self.stdio, timeout=100) - self.file_name = "{0}/obdiag_tabledump_result_{1}.txt".format(self.export_report_path, self.gather_timestamp) + self.file_name = "{0}/obdiag_tabledump_result_{1}.txt".format(self.store_dir, self.gather_timestamp) return True except Exception as e: + self.stdio.error(e) return False def handle(self): @@ -83,7 +97,8 @@ def handle(self): self.stdio.error('init config failed') return False self.execute() - self.stdio.print("get table info finished. For more details, please run cmd '" + Fore.YELLOW + " cat {0} ".format(self.file_name) + Style.RESET_ALL + "'") + if not self.is_innner: + self.stdio.print("get table info finished. For more details, please run cmd '" + Fore.YELLOW + " cat {0} ".format(self.file_name) + Style.RESET_ALL + "'") def execute(self): try: @@ -106,8 +121,9 @@ def __get_table_schema(self): def __get_table_info(self): try: - tenant_data = self.ob_connector.execute_sql_return_cursor_dictionary("select tenant_id from oceanbase.__all_tenant where tenant_name='{0}'".format(self.tenant_name)) - if tenant_data is None: + sql = "select tenant_id from oceanbase.__all_tenant where tenant_name='{0}'".format(self.tenant_name) + tenant_data = self.ob_connector.execute_sql_return_cursor_dictionary(sql) + if tenant_data.rowcount == 0: self.stdio.error("tenant is None") return self.tenant_id = tenant_data.fetchall()[0].get("tenant_id") @@ -115,7 +131,7 @@ def __get_table_info(self): database_data = self.ob_connector.execute_sql_return_cursor_dictionary( "SELECT con_id as tenant_id, object_id as database_id, object_name as database_name FROM oceanbase.cdb_objects where OBJECT_TYPE = 'DATABASE' and con_id = '{0}' and object_name='{1}' ".format(self.tenant_id, self.database) ) - if database_data is None: + if database_data.rowcount == 0: self.stdio.error("database is None") return self.database_id = database_data.fetchall()[0].get("database_id") @@ -124,18 +140,20 @@ def __get_table_info(self): self.tenant_id, self.database_id, self.table ) ) - if table_data is None: + if table_data.rowcount == 0: self.stdio.error("table is None") return self.table_id = table_data.fetchall()[0].get("table_id") ## 查询行数 - query_count = "select /*+read_consistency(weak) QUERY_TIMEOUT(60000000) */ table_name as 'Table' , ifnull(num_rows,0) as num_rows from oceanbase.cdb_tables where con_id = '{0}' and owner = '{1}' and table_name = '{2}' order by num_rows desc limit 1".format( - self.tenant_id, self.database, self.table + query_count = ( + "select /*+read_consistency(weak) QUERY_TIMEOUT(60000000) */ table_name , ifnull(num_rows,0) as num_rows from oceanbase.cdb_tables where con_id = '{0}' and owner = '{1}' and table_name = '{2}' order by num_rows desc limit 1".format( + self.tenant_id, self.database, self.table + ) ) columns, result = self.ob_connector.execute_sql_return_columns_and_data(query_count) - if result is None: - self.stdio.error("line Count is None") + if result.count == 0: + self.stdio.error("line count is None") return self.stdio.print("table count {0}".format(result)) @@ -146,7 +164,7 @@ def __get_table_info(self): ) columns, result = self.ob_connector.execute_sql_return_columns_and_data(query_data) - if result is None: + if result.count == 0: self.stdio.error("dataSize is None") return self.stdio.print("data size {0}".format(result)) @@ -158,17 +176,17 @@ def __get_table_info(self): def __get_table_info_v3(self): try: tenant_data = self.ob_connector.execute_sql_return_cursor_dictionary("select tenant_id from oceanbase.__all_tenant where tenant_name='{0}'".format(self.tenant_name)) - if tenant_data is None: + if tenant_data.rowcount == 0: self.stdio.error("tenant is None") return self.tenant_id = tenant_data.fetchall()[0].get("tenant_id") database_data = self.ob_connector.execute_sql_return_cursor_dictionary("select tenant_id,database_id,database_name from oceanbase.gv$database where tenant_name = '{0}' and database_name = '{1}' ".format(self.tenant_name, self.database)) - if database_data is None: + if database_data.rowcount == 0: self.stdio.error("database is None") return self.database_id = database_data.fetchall()[0].get("database_id") table_data = self.ob_connector.execute_sql_return_cursor_dictionary("select * from oceanbase.__all_virtual_table where table_name='{0}' and database_id='{1}' and tenant_id='{2}'".format(self.table, self.database_id, self.tenant_id)) - if table_data is None: + if table_data.rowcount == 0: self.stdio.error("table is None") return self.table_id = table_data.fetchall()[0].get("table_id") @@ -177,7 +195,7 @@ def __get_table_info_v3(self): self.tenant_id, self.table_id, self.table ) columns, result = self.ob_connector.execute_sql_return_columns_and_data(query_count) - if result is None: + if result.count == 0: self.stdio.error("dataSize and line count is None") return self.stdio.print("table count {0}".format(result)) From 37550146a638dcc7f2f20e8756530b840fcbb554 Mon Sep 17 00:00:00 2001 From: "jingshun.tq" <35712518+Teingi@users.noreply.github.com> Date: Thu, 11 Jul 2024 11:52:49 +0800 Subject: [PATCH 07/68] table dump print pretty result (#322) --- core.py | 2 ++ handler/gather/gather_tabledump.py | 13 ++++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/core.py b/core.py index 27297831..7d678312 100644 --- a/core.py +++ b/core.py @@ -239,6 +239,7 @@ def gather_function(self, function_type, opt): return handler.handle() elif function_type == 'gather_tabledump': handler = GatherTableDumpHandler(self.context) + return handler.handle() elif function_type == 'gather_parameters': handler = GatherParametersHandler(self.context) return handler.handle() @@ -290,6 +291,7 @@ def analyze_fuction(self, function_type, opt): elif function_type == 'analyze_sql_review': self.set_context(function_type, 'analyze', config) handler = AnalyzeSQLReviewHandler(self.context) + handler.handle() elif function_type == 'analyze_parameter_non_default': self.set_context(function_type, 'analyze', config) handler = AnalyzeParameterHandler(self.context, 'non_default') diff --git a/handler/gather/gather_tabledump.py b/handler/gather/gather_tabledump.py index 52ab0538..61196405 100644 --- a/handler/gather/gather_tabledump.py +++ b/handler/gather/gather_tabledump.py @@ -17,6 +17,7 @@ """ import os +import time from stdio import SafeStdio from common.ob_connector import OBConnector from common.tool import StringUtils @@ -93,12 +94,13 @@ def init_config(self): return False def handle(self): + self.start_time = time.time() if not self.init_config(): self.stdio.error('init config failed') return False self.execute() if not self.is_innner: - self.stdio.print("get table info finished. For more details, please run cmd '" + Fore.YELLOW + " cat {0} ".format(self.file_name) + Style.RESET_ALL + "'") + self.__print_result() def execute(self): try: @@ -227,3 +229,12 @@ def __extract_string(self, s): return s[at_index + 1 :] else: return s + + def __print_result(self): + self.end_time = time.time() + elapsed_time = self.end_time - self.start_time + data = [["Status", "Result Details", "Time"], ["Completed", self.file_name, f"{elapsed_time:.2f} s"]] + table = tabulate(data, headers="firstrow", tablefmt="grid") + self.stdio.print("\nAnalyze SQL Summary:") + self.stdio.print(table) + self.stdio.print("\n") From 0154718ede19e719fd4850bace46477e261b8559 Mon Sep 17 00:00:00 2001 From: "jingshun.tq" <35712518+Teingi@users.noreply.github.com> Date: Mon, 15 Jul 2024 14:50:39 +0800 Subject: [PATCH 08/68] Fix regression testing bugs (#324) * table dump print pretty result * Fix regression testing bugs * Fix regression testing bugs * Optimize logs * Optimize logs * Optimize logs --- diag_cmd.py | 2 +- handler/analyzer/analyze_flt_trace.py | 3 ++- handler/analyzer/analyze_log.py | 2 +- handler/analyzer/analyze_parameter.py | 8 ++++---- handler/analyzer/analyze_sql.py | 2 +- handler/analyzer/analyze_sql_review.py | 2 +- handler/analyzer/analyze_variable.py | 6 +++--- handler/gather/gather_ash_report.py | 2 +- handler/gather/gather_awr.py | 2 +- handler/gather/gather_log.py | 2 +- handler/gather/gather_obadmin.py | 2 +- handler/gather/gather_obproxy_log.py | 2 +- handler/gather/gather_obstack2.py | 3 +-- handler/gather/gather_parameters.py | 16 ++++++++-------- handler/gather/gather_perf.py | 11 ++++++----- handler/gather/gather_plan_monitor.py | 2 +- handler/gather/gather_scenes.py | 2 +- handler/gather/gather_sysstat.py | 2 +- handler/gather/gather_tabledump.py | 2 +- handler/gather/gather_variables.py | 10 +++++----- 20 files changed, 42 insertions(+), 41 deletions(-) diff --git a/diag_cmd.py b/diag_cmd.py index 49b31fb1..24f8a6ec 100644 --- a/diag_cmd.py +++ b/diag_cmd.py @@ -744,7 +744,7 @@ def _do_command(self, obdiag): class ObdiagAnalyzeSQLReviewCommand(ObdiagOriginCommand): def __init__(self): - super(ObdiagAnalyzeSQLReviewCommand, self).__init__('sql_review', 'Analyze oceanbase sql from sql_audit ') + super(ObdiagAnalyzeSQLReviewCommand, self).__init__('sql_review', 'Analyze oceanbase sql from file') self.parser.add_option('--host', type='string', help="tenant connection host") self.parser.add_option('--port', type='string', help="tenant connection port") self.parser.add_option('--password', type='string', help="tenant connection user password", default='') diff --git a/handler/analyzer/analyze_flt_trace.py b/handler/analyzer/analyze_flt_trace.py index fb14ccae..8624baaf 100644 --- a/handler/analyzer/analyze_flt_trace.py +++ b/handler/analyzer/analyze_flt_trace.py @@ -192,7 +192,7 @@ def check_filename(filename): log_full_path = "{gather_path}/{log_name}".format(log_name=self.flt_trace_id, gather_path=gather_path) download_file(ssh_client, log_full_path, local_store_path, self.stdio) - def __get_offline_log_file(self, ssh_client, log_full_path, local_store_dir): + def __get_offline_log_file(self, ssh_client, log_path, local_store_dir): """ :param ssh_client, log_name :return: @@ -202,6 +202,7 @@ def __get_offline_log_file(self, ssh_client, log_full_path, local_store_dir): if self.flt_trace_id is not None and (len(log_name_list) > 0): grep_cmd = "grep -e '{grep_args}' {log_file} > {local_store_path} ".format(grep_args=self.flt_trace_id, log_file=' '.join(log_name_list), local_store_path=local_store_path) LocalClient(self.stdio).run(grep_cmd) + log_full_path = "{gather_path}/{log_name}".format(gather_path=log_path, log_name=self.flt_trace_id) download_file(ssh_client, log_full_path, local_store_path, self.stdio) def __get_log_name_list_offline(self): diff --git a/handler/analyzer/analyze_log.py b/handler/analyzer/analyze_log.py index 0d4a9646..434211e6 100644 --- a/handler/analyzer/analyze_log.py +++ b/handler/analyzer/analyze_log.py @@ -110,7 +110,7 @@ def init_option(self): self.stdio.print('analyze log from_time: {0}, to_time: {1}'.format(self.from_time_str, self.to_time_str)) if store_dir_option is not None: if not os.path.exists(os.path.abspath(store_dir_option)): - self.stdio.warn('Error: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + self.stdio.warn('args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) os.makedirs(os.path.abspath(store_dir_option)) self.gather_pack_dir = os.path.abspath(store_dir_option) if grep_option is not None: diff --git a/handler/analyzer/analyze_parameter.py b/handler/analyzer/analyze_parameter.py index 97ca13cc..47b51480 100644 --- a/handler/analyzer/analyze_parameter.py +++ b/handler/analyzer/analyze_parameter.py @@ -59,8 +59,8 @@ def get_version(self): try: observer_version = get_observer_version_by_sql(self.ob_cluster, self.stdio) except Exception as e: - self.stdio.warn("AnalyzeHandler Failed to get observer version:{0}".format(e)) - self.stdio.verbose("AnalyzeHandler.init get observer version: {0}".format(observer_version)) + self.stdio.warn("failed to get observer version:{0}".format(e)) + self.stdio.verbose("get observer version: {0}".format(observer_version)) return observer_version def handle(self): @@ -82,7 +82,7 @@ def init_option_non_default(self): offline_file_option = Util.get_option(options, 'file') if store_dir_option and store_dir_option != "./": if not os.path.exists(os.path.abspath(store_dir_option)): - self.stdio.warn('warn: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + self.stdio.warn('args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) os.makedirs(os.path.abspath(store_dir_option)) self.export_report_path = os.path.abspath(store_dir_option) else: @@ -105,7 +105,7 @@ def init_option_diff(self): offline_file_option = Util.get_option(options, 'file') if store_dir_option and store_dir_option != "./": if not os.path.exists(os.path.abspath(store_dir_option)): - self.stdio.warn('warn: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + self.stdio.warn('args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) os.makedirs(os.path.abspath(store_dir_option)) self.export_report_path = os.path.abspath(store_dir_option) else: diff --git a/handler/analyzer/analyze_sql.py b/handler/analyzer/analyze_sql.py index d54168e3..e6ab6374 100644 --- a/handler/analyzer/analyze_sql.py +++ b/handler/analyzer/analyze_sql.py @@ -161,7 +161,7 @@ def init_option(self): store_dir_option = Util.get_option(options, 'store_dir') if store_dir_option is not None: if not os.path.exists(os.path.abspath(store_dir_option)): - self.stdio.warn('Error: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + self.stdio.warn('args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) os.makedirs(os.path.abspath(store_dir_option)) self.local_stored_parrent_path = os.path.abspath(store_dir_option) output_option = Util.get_option(options, 'output') diff --git a/handler/analyzer/analyze_sql_review.py b/handler/analyzer/analyze_sql_review.py index 1b69f3eb..c4253705 100644 --- a/handler/analyzer/analyze_sql_review.py +++ b/handler/analyzer/analyze_sql_review.py @@ -91,7 +91,7 @@ def init_option(self): store_dir_option = Util.get_option(options, 'store_dir') if store_dir_option is not None: if not os.path.exists(os.path.abspath(store_dir_option)): - self.stdio.warn('Error: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + self.stdio.warn('args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) os.makedirs(os.path.abspath(store_dir_option)) self.local_stored_parrent_path = os.path.abspath(store_dir_option) output_option = Util.get_option(options, 'output') diff --git a/handler/analyzer/analyze_variable.py b/handler/analyzer/analyze_variable.py index 43fc8d32..478c3c3d 100644 --- a/handler/analyzer/analyze_variable.py +++ b/handler/analyzer/analyze_variable.py @@ -48,8 +48,8 @@ def __init__(self, context): database="oceanbase", ) except Exception as e: - self.stdio.error("Failed to connect to database: {0}".format(e)) - raise OBDIAGFormatException("Failed to connect to database: {0}".format(e)) + self.stdio.error("failed to connect to database: {0}".format(e)) + raise OBDIAGFormatException("failed to connect to database: {0}".format(e)) def handle(self): if not self.init_option(): @@ -75,7 +75,7 @@ def init_option(self): if store_dir_option and store_dir_option != "./": if not os.path.exists(os.path.abspath(store_dir_option)): - self.stdio.warn('warn: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + self.stdio.warn('args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) os.makedirs(os.path.abspath(store_dir_option)) self.export_report_path = os.path.abspath(store_dir_option) else: diff --git a/handler/gather/gather_ash_report.py b/handler/gather/gather_ash_report.py index f6aa955e..fc1e4eb1 100644 --- a/handler/gather/gather_ash_report.py +++ b/handler/gather/gather_ash_report.py @@ -153,7 +153,7 @@ def init_option(self): self.stdio.print('gather from_time: {0}, to_time: {1}'.format(self.from_time_str, self.to_time_str)) if store_dir_option: if not os.path.exists(os.path.abspath(store_dir_option)): - self.stdio.warn('warn: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + self.stdio.warn('args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) os.makedirs(os.path.abspath(store_dir_option)) self.gather_pack_dir = os.path.abspath(store_dir_option) if sql_id_option: diff --git a/handler/gather/gather_awr.py b/handler/gather/gather_awr.py index 9e58d106..bec5b9e6 100644 --- a/handler/gather/gather_awr.py +++ b/handler/gather/gather_awr.py @@ -270,7 +270,7 @@ def init_option(self): self.stdio.print('gather log from_time: {0}, to_time: {1}'.format(self.from_time_str, self.to_time_str)) if store_dir_option and store_dir_option != "./": if not os.path.exists(os.path.abspath(store_dir_option)): - self.stdio.warn('warn: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + self.stdio.warn('args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) os.makedirs(os.path.abspath(store_dir_option)) self.gather_pack_dir = os.path.abspath(store_dir_option) return True diff --git a/handler/gather/gather_log.py b/handler/gather/gather_log.py index 8bbbf412..f368cab5 100644 --- a/handler/gather/gather_log.py +++ b/handler/gather/gather_log.py @@ -118,7 +118,7 @@ def init_option(self): self.stdio.print('gather log from_time: {0}, to_time: {1}'.format(self.from_time_str, self.to_time_str)) if store_dir_option is not None and store_dir_option != './': if not os.path.exists(os.path.abspath(store_dir_option)): - self.stdio.warn('warn: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + self.stdio.warn('args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) os.makedirs(os.path.abspath(store_dir_option)) self.gather_pack_dir = os.path.abspath(store_dir_option) if scope_option: diff --git a/handler/gather/gather_obadmin.py b/handler/gather/gather_obadmin.py index 39169fc5..a7c3da04 100644 --- a/handler/gather/gather_obadmin.py +++ b/handler/gather/gather_obadmin.py @@ -106,7 +106,7 @@ def init_option(self): self.stdio.print('gather from_time: {0}, to_time: {1}'.format(self.from_time_str, self.to_time_str)) if store_dir_option and store_dir_option != './': if not os.path.exists(os.path.abspath(store_dir_option)): - self.stdio.warn('Error: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + self.stdio.warn('args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) os.makedirs(os.path.abspath(store_dir_option)) self.local_stored_path = os.path.abspath(store_dir_option) if encrypt_option == "true": diff --git a/handler/gather/gather_obproxy_log.py b/handler/gather/gather_obproxy_log.py index 265e6446..efd54b37 100644 --- a/handler/gather/gather_obproxy_log.py +++ b/handler/gather/gather_obproxy_log.py @@ -121,7 +121,7 @@ def init_option(self): self.stdio.print('gather from_time: {0}, to_time: {1}'.format(self.from_time_str, self.to_time_str)) if store_dir_option and store_dir_option != './': if not os.path.exists(os.path.abspath(store_dir_option)): - self.stdio.warn('warn: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + self.stdio.warn('args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) os.makedirs(os.path.abspath(store_dir_option)) self.gather_pack_dir = os.path.abspath(store_dir_option) if scope_option: diff --git a/handler/gather/gather_obstack2.py b/handler/gather/gather_obstack2.py index 2178a51e..2ca09f70 100644 --- a/handler/gather/gather_obstack2.py +++ b/handler/gather/gather_obstack2.py @@ -69,7 +69,7 @@ def init_option(self): store_dir_option = Util.get_option(options, 'store_dir') if store_dir_option and store_dir_option != './': if not os.path.exists(os.path.abspath(store_dir_option)): - self.stdio.warn('warn: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + self.stdio.warn('args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) os.makedirs(os.path.abspath(store_dir_option)) self.local_stored_path = os.path.abspath(store_dir_option) return True @@ -224,7 +224,6 @@ def __gather_obstack2_info(self, ssh_client, user, observer_pid, remote_gather_d ssh_client.exec_cmd(chown_cmd) self.stdio.verbose("gather obstack info on server {0}, run cmd = [su {1}, {2}]".format(ssh_client.get_name(), user, cmd)) ssh_client.ssh_invoke_shell_switch_user(user, cmd, 10) - ssh_client.exec_cmd("rm -rf /tmp/{0}".format(remote_gather_dir)) @staticmethod def __get_overall_summary(node_summary_tuple): diff --git a/handler/gather/gather_parameters.py b/handler/gather/gather_parameters.py index bec7463e..ea553faf 100644 --- a/handler/gather/gather_parameters.py +++ b/handler/gather/gather_parameters.py @@ -47,8 +47,8 @@ def __init__(self, context, gather_pack_dir='./'): database="oceanbase", ) except Exception as e: - self.stdio.error("Failed to connect to database: {0}".format(e)) - raise OBDIAGFormatException("Failed to connect to database: {0}".format(e)) + self.stdio.error("failed to connect to database: {0}".format(e)) + raise OBDIAGFormatException("failed to connect to database: {0}".format(e)) def handle(self): if not self.init_option(): @@ -66,7 +66,7 @@ def init_option(self): store_dir_option = Util.get_option(options, 'store_dir') if store_dir_option and store_dir_option != "./": if not os.path.exists(os.path.abspath(store_dir_option)): - self.stdio.warn('warn: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + self.stdio.warn('args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) os.makedirs(os.path.abspath(store_dir_option)) self.gather_pack_dir = os.path.abspath(store_dir_option) return True @@ -76,8 +76,8 @@ def get_version(self): try: observer_version = get_observer_version_by_sql(self.ob_cluster, self.stdio) except Exception as e: - self.stdio.warn("GatherHandler Failed to get observer version:{0}".format(e)) - self.stdio.verbose("GatherHandler.init get observer version: {0}".format(observer_version)) + self.stdio.warn("failed to get observer version:{0}".format(e)) + self.stdio.verbose("get observer version: {0}".format(observer_version)) return observer_version def get_cluster_name(self): @@ -87,8 +87,8 @@ def get_cluster_name(self): cluster_info = self.obconn.execute_sql(sql) cluster_name = cluster_info[0][0] except Exception as e: - self.stdio.warn("RCAHandler Failed to get oceanbase cluster name:{0}".format(e)) - self.stdio.verbose("RCAHandler.init get oceanbase cluster name {0}".format(cluster_name)) + self.stdio.warn("failed to get oceanbase cluster name:{0}".format(e)) + self.stdio.verbose("get oceanbase cluster name {0}".format(cluster_name)) return cluster_name def get_parameters_info(self): @@ -121,7 +121,7 @@ def get_parameters_info(self): writer.writerow(row) self.stdio.print("Gather parameters finished. For more details, please run cmd '" + Fore.YELLOW + "cat {0}".format(self.parameter_file_name) + Style.RESET_ALL + "'") else: - self.stdio.warn("Failed to retrieve the database version. Please check if the database connection is normal.") + self.stdio.warn("failed to retrieve the database version. Please check if the database connection is normal.") def execute(self): try: diff --git a/handler/gather/gather_perf.py b/handler/gather/gather_perf.py index e9244425..db792d3d 100644 --- a/handler/gather/gather_perf.py +++ b/handler/gather/gather_perf.py @@ -22,7 +22,7 @@ import tabulate from common.command import get_observer_pid, mkdir, zip_dir, get_file_size, download_file, delete_file_force -from common.command import LocalClient, SshClient +from common.command import SshClient from common.constant import const from handler.base_shell_handler import BaseShellHandler from common.tool import Util @@ -70,7 +70,7 @@ def init_option(self): store_dir_option = Util.get_option(options, 'store_dir') if store_dir_option and store_dir_option != './': if not os.path.exists(os.path.abspath(store_dir_option)): - self.stdio.warn('warn: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + self.stdio.warn('args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) os.makedirs(os.path.abspath(store_dir_option)) self.local_stored_path = os.path.abspath(store_dir_option) self.scope_option = Util.get_option(options, 'scope') @@ -118,9 +118,6 @@ def __handle_from_node(self, node, local_stored_path): resp = {"skip": False, "error": "", "gather_pack_path": ""} remote_ip = node.get("ip") if self.is_ssh else NetUtils.get_inner_ip(self.stdio) remote_user = node.get("ssh_username") - remote_password = node.get("ssh_password") - remote_port = node.get("ssh_port") - remote_private_key = node.get("ssh_key_file") self.stdio.verbose("Sending Collect Shell Command to node {0} ...".format(remote_ip)) DirectoryUtil.mkdir(path=local_stored_path, stdio=self.stdio) now_time = datetime.datetime.now().strftime('%Y%m%d%H%M%S') @@ -167,17 +164,20 @@ def __handle_from_node(self, node, local_stored_path): def __gather_perf_sample(self, ssh_client, gather_path, pid_observer): try: + self.stdio.start_loading('gather perf sample') cmd = "cd {gather_path} && perf record -o sample.data -e cycles -c 100000000 -p {pid} -g -- sleep 20".format(gather_path=gather_path, pid=pid_observer) self.stdio.verbose("gather perf sample, run cmd = [{0}]".format(cmd)) ssh_client.exec_cmd(cmd) generate_data = "cd {gather_path} && perf script -i sample.data -F ip,sym -f > sample.viz".format(gather_path=gather_path) self.stdio.verbose("generate perf sample data, run cmd = [{0}]".format(generate_data)) ssh_client.exec_cmd(generate_data) + self.stdio.stop_loading('gather perf sample') except: self.stdio.error("generate perf sample data on server [{0}] failed".format(ssh_client.get_name())) def __gather_perf_flame(self, ssh_client, gather_path, pid_observer): try: + self.stdio.start_loading('gather perf flame') perf_cmd = "cd {gather_path} && perf record -o flame.data -F 99 -p {pid} -g -- sleep 20".format(gather_path=gather_path, pid=pid_observer) self.stdio.verbose("gather perf, run cmd = [{0}]".format(perf_cmd)) ssh_client.exec_cmd(perf_cmd) @@ -185,6 +185,7 @@ def __gather_perf_flame(self, ssh_client, gather_path, pid_observer): generate_data = "cd {gather_path} && perf script -i flame.data > flame.viz".format(gather_path=gather_path) self.stdio.verbose("generate perf data, run cmd = [{0}]".format(generate_data)) ssh_client.exec_cmd(generate_data) + self.stdio.stop_loading('gather perf flame') except: self.stdio.error("generate perf data on server [{0}] failed".format(ssh_client.get_name())) diff --git a/handler/gather/gather_plan_monitor.py b/handler/gather/gather_plan_monitor.py index 454c49a5..38683d8e 100644 --- a/handler/gather/gather_plan_monitor.py +++ b/handler/gather/gather_plan_monitor.py @@ -259,7 +259,7 @@ def get_table_info(self, file_path): data = f.read() return data except Exception as e: - self.stdio.error(e) + self.stdio.warn(e) return None def report_schema(self, sql, tenant_name): diff --git a/handler/gather/gather_scenes.py b/handler/gather/gather_scenes.py index b782c672..d54e2f57 100644 --- a/handler/gather/gather_scenes.py +++ b/handler/gather/gather_scenes.py @@ -209,7 +209,7 @@ def init_option(self): self.stdio.print('gather from_time: {0}, to_time: {1}'.format(self.from_time_str, self.to_time_str)) if store_dir_option: if not os.path.exists(os.path.abspath(store_dir_option)): - self.stdio.warn('warn: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + self.stdio.warn('args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) os.makedirs(os.path.abspath(store_dir_option)) self.gather_pack_dir = os.path.abspath(store_dir_option) if scene_option: diff --git a/handler/gather/gather_sysstat.py b/handler/gather/gather_sysstat.py index f9299c7d..a77dff57 100644 --- a/handler/gather/gather_sysstat.py +++ b/handler/gather/gather_sysstat.py @@ -71,7 +71,7 @@ def init_option(self): store_dir_option = Util.get_option(options, 'store_dir') if store_dir_option and store_dir_option != './': if not os.path.exists(os.path.abspath(store_dir_option)): - self.stdio.warn('warn: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + self.stdio.warn('args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) os.makedirs(os.path.abspath(store_dir_option)) self.local_stored_path = os.path.abspath(store_dir_option) self.scope_option = Util.get_option(options, 'scope') diff --git a/handler/gather/gather_tabledump.py b/handler/gather/gather_tabledump.py index 61196405..834c80c4 100644 --- a/handler/gather/gather_tabledump.py +++ b/handler/gather/gather_tabledump.py @@ -112,7 +112,7 @@ def execute(self): self.__get_table_info_v3() except Exception as e: self.stdio.error("report sql result to file: {0} failed, error: ".format(self.file_name)) - self.stdio.error("StepSQLHandler execute Exception: {0}".format(e).strip()) + self.stdio.error("GatherTableDumpHandler execute Exception: {0}".format(e).strip()) def __get_table_schema(self): sql = "show create table " + self.database + "." + self.table diff --git a/handler/gather/gather_variables.py b/handler/gather/gather_variables.py index 55c790ba..34729a3b 100644 --- a/handler/gather/gather_variables.py +++ b/handler/gather/gather_variables.py @@ -46,8 +46,8 @@ def __init__(self, context, gather_pack_dir='./'): database="oceanbase", ) except Exception as e: - self.stdio.error("Failed to connect to database: {0}".format(e)) - raise OBDIAGFormatException("Failed to connect to database: {0}".format(e)) + self.stdio.error("failed to connect to database: {0}".format(e)) + raise OBDIAGFormatException("failed to connect to database: {0}".format(e)) def handle(self): if not self.init_option(): @@ -64,7 +64,7 @@ def init_option(self): store_dir_option = Util.get_option(options, 'store_dir') if store_dir_option and store_dir_option != "./": if not os.path.exists(os.path.abspath(store_dir_option)): - self.stdio.warn('warn: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + self.stdio.warn('args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) os.makedirs(os.path.abspath(store_dir_option)) self.gather_pack_dir = os.path.abspath(store_dir_option) return True @@ -76,8 +76,8 @@ def get_cluster_name(self): cluster_info = self.obconn.execute_sql(sql) cluster_name = cluster_info[0][0] except Exception as e: - self.stdio.warn("RCAHandler Failed to get oceanbase cluster name:{0}".format(e)) - self.stdio.verbose("RCAHandler.init get oceanbase cluster name {0}".format(cluster_name)) + self.stdio.warn("failed to get oceanbase cluster name:{0}".format(e)) + self.stdio.verbose("get oceanbase cluster name {0}".format(cluster_name)) return cluster_name def get_variables_info(self): From 5950030a6695c7804e4a1a55607b1ffac54ab11d Mon Sep 17 00:00:00 2001 From: wayyoungboy <35394786+wayyoungboy@users.noreply.github.com> Date: Mon, 15 Jul 2024 15:26:30 +0800 Subject: [PATCH 09/68] fix get_obproxy_version on rca (#326) * fix KubernetesClient * fix KubernetesClient * fix KubernetesClient * fix KubernetesClient * fix "nodename nor servname provided" * delete build tag * fix ssh stdio print * fix remote_client * fix example operator.yml * fix get_obproxy_version on rca --- handler/rca/rca_handler.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/handler/rca/rca_handler.py b/handler/rca/rca_handler.py index 51da1a95..993bd173 100644 --- a/handler/rca/rca_handler.py +++ b/handler/rca/rca_handler.py @@ -99,12 +99,7 @@ def __init__(self, context): obproxy_version = "" try: if len(context_obproxy_nodes) > 0: - obproxy_version = get_obproxy_version( - True, - context_obproxy_nodes[0]["ssher"], - context_obproxy_nodes[0]["home_path"], - self.stdio, - ) + obproxy_version = get_obproxy_version(context) except Exception as e: self.stdio.warn("RCAHandler.init Failed to get obproxy version. Error:{0}".format(e)) if obproxy_version != "": From b3fe9d96098c3d0f68d36091fa4053d0406ab575 Mon Sep 17 00:00:00 2001 From: "jingshun.tq" <35712518+Teingi@users.noreply.github.com> Date: Mon, 15 Jul 2024 17:11:13 +0800 Subject: [PATCH 10/68] fix: gather tabledump (#328) * table dump print pretty result * Fix regression testing bugs * Fix regression testing bugs * Optimize logs * Optimize logs * Optimize logs * fix: gather tabledump * fix: gather tabledump --- diag_cmd.py | 2 +- handler/gather/gather_tabledump.py | 27 ++++++++++++++------------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/diag_cmd.py b/diag_cmd.py index 24f8a6ec..06d83d65 100644 --- a/diag_cmd.py +++ b/diag_cmd.py @@ -622,7 +622,7 @@ def __init__(self): self.parser.add_option('--table', type='string', help="Specifies the name of the table in the database to operate on.") self.parser.add_option('--user', type='string', help="The username to use for the database connection.") self.parser.add_option('--password', type='string', help="The password for the database user. If not specified, an attempt will be made to connect without a password.", default='') - self.parser.add_option('--store_dir', type='string', help='the dir to store gather result, current dir by default.', default='./gather_report') + self.parser.add_option('--store_dir', type='string', help='the dir to store gather result, current dir by default.', default='./obdiag_gather_report') self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) def init(self, cmd, args): diff --git a/handler/gather/gather_tabledump.py b/handler/gather/gather_tabledump.py index 834c80c4..3b4484f2 100644 --- a/handler/gather/gather_tabledump.py +++ b/handler/gather/gather_tabledump.py @@ -44,18 +44,12 @@ def __init__(self, context, store_dir="./obdiag_gather_report", is_inner=False): self.result_list = [] self.store_dir = store_dir self.is_innner = is_inner - try: - if not os.path.exists(store_dir): - os.makedirs(store_dir) - except Exception as e: - self.stdio.error("init gather_report {0}".format(e)) - raise Exception("int gather_report {0}".format(e)) if self.context.get_variable("gather_timestamp", None): self.gather_timestamp = self.context.get_variable("gather_timestamp") else: self.gather_timestamp = TimeUtils.get_current_us_timestamp() - def init_config(self): + def init(self): try: self.ob_cluster = self.context.cluster_config self.obproxy_nodes = self.context.obproxy_config['servers'] @@ -68,7 +62,15 @@ def init_config(self): self.table = Util.get_option(options, 'table') user = Util.get_option(options, 'user') password = Util.get_option(options, 'password') - self.store_dir = Util.get_option(options, 'store_dir') + if not (self.database and self.database and user and password): + self.stdio.error("option --database/--table/--user/--password not found, please provide") + return False + store_dir_option = Util.get_option(options, 'store_dir') + if store_dir_option is not None and store_dir_option != './': + if not os.path.exists(os.path.abspath(store_dir_option)): + self.stdio.warn('args --store_dir [{0}]: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + os.makedirs(os.path.abspath(store_dir_option)) + self.store_dir = os.path.abspath(store_dir_option) if self.context.get_variable("gather_database", None): self.database = self.context.get_variable("gather_database") if self.context.get_variable("gather_table", None): @@ -87,7 +89,7 @@ def init_config(self): ip=self.ob_cluster.get("db_host"), port=self.ob_cluster.get("db_port"), username=self.ob_cluster.get("tenant_sys").get("user"), password=self.ob_cluster.get("tenant_sys").get("password"), stdio=self.stdio, timeout=100 ) self.tenant_connector = OBConnector(ip=self.ob_cluster.get("db_host"), port=self.ob_cluster.get("db_port"), username=user, password=password, stdio=self.stdio, timeout=100) - self.file_name = "{0}/obdiag_tabledump_result_{1}.txt".format(self.store_dir, self.gather_timestamp) + self.file_name = "{0}/obdiag_tabledump_result_{1}.txt".format(self.store_dir, TimeUtils.timestamp_to_filename_time(self.gather_timestamp)) return True except Exception as e: self.stdio.error(e) @@ -95,8 +97,8 @@ def init_config(self): def handle(self): self.start_time = time.time() - if not self.init_config(): - self.stdio.error('init config failed') + if not self.init(): + self.stdio.error('init failed') return False self.execute() if not self.is_innner: @@ -111,8 +113,7 @@ def execute(self): else: self.__get_table_info_v3() except Exception as e: - self.stdio.error("report sql result to file: {0} failed, error: ".format(self.file_name)) - self.stdio.error("GatherTableDumpHandler execute Exception: {0}".format(e).strip()) + self.stdio.error("report sql result to file: {0} failed, error: {1}".format(self.file_name, e)) def __get_table_schema(self): sql = "show create table " + self.database + "." + self.table From 5e8cea2596d68b2b140b7b0d886dd0367dab58f5 Mon Sep 17 00:00:00 2001 From: wayyoungboy <35394786+wayyoungboy@users.noreply.github.com> Date: Mon, 15 Jul 2024 20:06:27 +0800 Subject: [PATCH 11/68] fix get_obproxy_version on rca (#330) * fix KubernetesClient * fix KubernetesClient * fix KubernetesClient * fix KubernetesClient * fix "nodename nor servname provided" * delete build tag * fix ssh stdio print * fix remote_client * fix example operator.yml * fix get_obproxy_version on rca * fix : cat with grep >> grep -e --- handler/gather/gather_log.py | 7 ++++--- handler/gather/gather_obproxy_log.py | 9 +++++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/handler/gather/gather_log.py b/handler/gather/gather_log.py index f368cab5..6001296a 100644 --- a/handler/gather/gather_log.py +++ b/handler/gather/gather_log.py @@ -304,7 +304,6 @@ def __pharse_log(self, ssh_client, home_path, log_name, gather_path): if type(self.grep_options) == str: grep_cmd = "grep -e '{grep_options}' {log_dir}/{log_name} >> {gather_path}/{log_name} ".format(grep_options=self.grep_options, gather_path=gather_path, log_name=log_name, log_dir=log_path) elif type(self.grep_options) == list and len(self.grep_options) > 0: - grep_litter_cmd = "" for grep_option in self.grep_options: if type(grep_option) != str: self.stdio.error('The grep args must be string or list of strings, but got {0}'.format(type(grep_option))) @@ -312,8 +311,10 @@ def __pharse_log(self, ssh_client, home_path, log_name, gather_path): elif grep_option == "": self.stdio.warn('The grep args must be string or list of strings, but got ""') continue - grep_litter_cmd += "| grep -e '{0}'".format(grep_option) - grep_cmd = "cat {log_dir}/{log_name} {grep_options} >> {gather_path}/{log_name} ".format(grep_options=grep_litter_cmd, gather_path=gather_path, log_name=log_name, log_dir=log_path) + if grep_cmd == "": + grep_cmd = "grep -e '{0}' ".format(grep_option) + "{log_dir}/{log_name}".format(log_name=log_name, log_dir=log_path) + grep_cmd += "| grep -e '{0}'".format(grep_option) + grep_cmd += " >> {gather_path}/{log_name} ".format(gather_path=gather_path, log_name=log_name, log_dir=log_path) self.stdio.verbose('grep files, run cmd = [{0}]'.format(grep_cmd)) ssh_client.exec_cmd(grep_cmd) else: diff --git a/handler/gather/gather_obproxy_log.py b/handler/gather/gather_obproxy_log.py index efd54b37..9f7b60fd 100644 --- a/handler/gather/gather_obproxy_log.py +++ b/handler/gather/gather_obproxy_log.py @@ -263,7 +263,6 @@ def __pharse_log(self, ssh_client, home_path, log_name, gather_path): if type(self.grep_args) == str: grep_cmd = "grep -e '{grep_args}' {log_dir}/{log_name} >> {gather_path}/{log_name} ".format(grep_args=self.grep_args, gather_path=gather_path, log_name=log_name, log_dir=log_path) elif type(self.grep_args) == list and len(self.grep_args) > 0: - grep_litter_cmd = "" for grep_arg in self.grep_args: if type(grep_arg) != str: self.stdio.error('The grep args must be string or list of strings, but got {0}'.format(type(grep_arg))) @@ -271,9 +270,11 @@ def __pharse_log(self, ssh_client, home_path, log_name, gather_path): elif grep_arg == "": self.stdio.warn('The grep args must be string or list of strings, but got ""') continue - grep_litter_cmd += "| grep -e '{0}'".format(grep_arg) - - grep_cmd = "cat {log_dir}/{log_name} {grep_args} >> {gather_path}/{log_name} ".format(grep_args=grep_litter_cmd, gather_path=gather_path, log_name=log_name, log_dir=log_path) + if grep_cmd == "": + grep_cmd = "grep -e '{0}' ".format(grep_arg) + "{log_dir}/{log_name}".format(log_name=log_name, log_dir=log_path) + continue + grep_cmd += "| grep -e '{0}'".format(grep_arg) + grep_cmd += " >> {log_dir}/{log_name}".format(log_name=log_name, log_dir=log_path) self.stdio.verbose("grep files, run cmd = [{0}]".format(grep_cmd)) ssh_client.exec_cmd(grep_cmd) else: From 900b2e8bf76fb5e0c1387891ae8ccc5a542f6d71 Mon Sep 17 00:00:00 2001 From: wayyoungboy <35394786+wayyoungboy@users.noreply.github.com> Date: Tue, 16 Jul 2024 11:28:53 +0800 Subject: [PATCH 12/68] fix stack (#331) * fix KubernetesClient * fix KubernetesClient * fix KubernetesClient * fix KubernetesClient * fix "nodename nor servname provided" * delete build tag * fix ssh stdio print * fix remote_client * fix example operator.yml * fix get_obproxy_version on rca * fix : cat with grep >> grep -e * fix stack --- common/command.py | 2 +- handler/gather/gather_obstack2.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/common/command.py b/common/command.py index a045a6ae..2af96419 100644 --- a/common/command.py +++ b/common/command.py @@ -99,7 +99,7 @@ def upload_file(ssh_client, local_path, remote_path, stdio=None): """ stdio.verbose("Please wait a moment, upload file to server {0}, local file path {1}, remote file path {2}".format(ssh_client.get_name(), local_path, remote_path)) try: - ssh_client.upload(local_path, remote_path) + ssh_client.upload(remote_path, local_path) except Exception as e: stdio.error("Upload File Failed error: {0}".format(e)) diff --git a/handler/gather/gather_obstack2.py b/handler/gather/gather_obstack2.py index 2ca09f70..9b0e8084 100644 --- a/handler/gather/gather_obstack2.py +++ b/handler/gather/gather_obstack2.py @@ -201,7 +201,7 @@ def __chmod_obstack2(self, ssh_client): def __is_obstack_exists(self, ssh_client): cmd = "test -e {file} && echo exists".format(file=const.OBSTACK2_DEFAULT_INSTALL_PATH) - stdout = ssh_client.exec_cmd(cmd)[0] + stdout = ssh_client.exec_cmd(cmd) if stdout == 'exists': return False else: From a8718bc20c75a4667748c16911298833b67371fd Mon Sep 17 00:00:00 2001 From: "jingshun.tq" <35712518+Teingi@users.noreply.github.com> Date: Tue, 16 Jul 2024 11:32:04 +0800 Subject: [PATCH 13/68] fixed: analyze flt_trace offline (#332) * table dump print pretty result * Fix regression testing bugs * Fix regression testing bugs * Optimize logs * Optimize logs * Optimize logs * fix: gather tabledump * fix: gather tabledump * fix analyze flt_trace offline --- handler/analyzer/analyze_flt_trace.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/handler/analyzer/analyze_flt_trace.py b/handler/analyzer/analyze_flt_trace.py index 8624baaf..cac530a1 100644 --- a/handler/analyzer/analyze_flt_trace.py +++ b/handler/analyzer/analyze_flt_trace.py @@ -202,8 +202,6 @@ def __get_offline_log_file(self, ssh_client, log_path, local_store_dir): if self.flt_trace_id is not None and (len(log_name_list) > 0): grep_cmd = "grep -e '{grep_args}' {log_file} > {local_store_path} ".format(grep_args=self.flt_trace_id, log_file=' '.join(log_name_list), local_store_path=local_store_path) LocalClient(self.stdio).run(grep_cmd) - log_full_path = "{gather_path}/{log_name}".format(gather_path=log_path, log_name=self.flt_trace_id) - download_file(ssh_client, log_full_path, local_store_path, self.stdio) def __get_log_name_list_offline(self): """ From 9c5031fdbdb218f70c6f839712f9d5b45947e1b3 Mon Sep 17 00:00:00 2001 From: sunpeng <35529415+oraclebird@users.noreply.github.com> Date: Tue, 16 Jul 2024 15:15:26 +0800 Subject: [PATCH 14/68] =?UTF-8?q?obdiag=E5=AE=9E=E7=8E=B0=E5=8F=82?= =?UTF-8?q?=E6=95=B0=E3=80=81=E5=8F=98=E9=87=8F=E9=87=87=E9=9B=86=E5=92=8C?= =?UTF-8?q?=E6=AF=94=E5=AF=B9=E5=8A=9F=E8=83=BD=20(#329)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * obdiag gather and analyze parameters/variables * obdiag gather and analyze parameters/variables * obdiag gather and analyze parameters/variables * obdiag gather and analyze parameters/variables * CSV添加表头 * obdiag gather and analyze parameters/variables * 添加文件有效性验证 * 添加文件有效性验证 * 修改参数范围表述 * 修改重复项和描述 --- core.py | 24 +++++----- diag_cmd.py | 64 ++++++++++++++++----------- handler/analyzer/analyze_parameter.py | 43 +++++++++++++----- handler/analyzer/analyze_variable.py | 28 +++++++++--- handler/gather/gather_parameters.py | 8 ++-- handler/gather/gather_variables.py | 8 ++-- 6 files changed, 114 insertions(+), 61 deletions(-) diff --git a/core.py b/core.py index 7d678312..73f8a956 100644 --- a/core.py +++ b/core.py @@ -284,25 +284,25 @@ def analyze_fuction(self, function_type, opt): self.set_context(function_type, 'analyze', config) handler = AnalyzeFltTraceHandler(self.context) handler.handle() - elif function_type == 'analyze_sql': - self.set_context(function_type, 'analyze', config) - handler = AnalyzeSQLHandler(self.context) - handler.handle() - elif function_type == 'analyze_sql_review': - self.set_context(function_type, 'analyze', config) - handler = AnalyzeSQLReviewHandler(self.context) - handler.handle() - elif function_type == 'analyze_parameter_non_default': + elif function_type == 'analyze_parameter_default': self.set_context(function_type, 'analyze', config) - handler = AnalyzeParameterHandler(self.context, 'non_default') + handler = AnalyzeParameterHandler(self.context, 'default') handler.handle() elif function_type == 'analyze_parameter_diff': self.set_context_skip_cluster_conn(function_type, 'analyze', config) handler = AnalyzeParameterHandler(self.context, 'diff') handler.handle() - elif function_type == 'analyze_variable': + elif function_type == 'analyze_variable_diff': self.set_context(function_type, 'analyze', config) - handler = AnalyzeVariableHandler(self.context) + handler = AnalyzeVariableHandler(self.context, 'diff') + handler.handle() + elif function_type == 'analyze_sql': + self.set_context(function_type, 'analyze', config) + handler = AnalyzeSQLHandler(self.context) + handler.handle() + elif function_type == 'analyze_sql_review': + self.set_context(function_type, 'analyze', config) + handler = AnalyzeSQLReviewHandler(self.context) handler.handle() else: self._call_stdio('error', 'Not support analyze function: {0}'.format(function_type)) diff --git a/diag_cmd.py b/diag_cmd.py index 06d83d65..cc54650a 100644 --- a/diag_cmd.py +++ b/diag_cmd.py @@ -697,20 +697,49 @@ def _do_command(self, obdiag): return obdiag.analyze_fuction('analyze_parameter_diff', self.opts) -class ObdiagAnalyzeParameterNonDefaultCommand(ObdiagOriginCommand): +class ObdiagAnalyzeParameterDefaultCommand(ObdiagOriginCommand): def __init__(self): - super(ObdiagAnalyzeParameterNonDefaultCommand, self).__init__('non-default', 'Analyze the parameter to identify parameters with non-default values') + super(ObdiagAnalyzeParameterDefaultCommand, self).__init__('default', 'Analyze the parameter to identify parameters with non-default values') self.parser.add_option('--file', type='string', help="specify initialization parameter file") self.parser.add_option('--store_dir', type='string', help='the dir to store gather result, current dir by default.', default='./') self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) def init(self, cmd, args): - super(ObdiagAnalyzeParameterNonDefaultCommand, self).init(cmd, args) + super(ObdiagAnalyzeParameterDefaultCommand, self).init(cmd, args) self.parser.set_usage('%s [options]' % self.prev_cmd) return self def _do_command(self, obdiag): - return obdiag.analyze_fuction('analyze_parameter_non_default', self.opts) + return obdiag.analyze_fuction('analyze_parameter_default', self.opts) + + +class ObdiagAnalyzeParameterCommand(MajorCommand): + def __init__(self): + super(ObdiagAnalyzeParameterCommand, self).__init__('parameter', 'Analyze oceanbase parameters info') + self.register_command(ObdiagAnalyzeParameterDiffCommand()) + self.register_command(ObdiagAnalyzeParameterDefaultCommand()) + + +class ObdiagAnalyzeVariableDiffCommand(ObdiagOriginCommand): + def __init__(self): + super(ObdiagAnalyzeVariableDiffCommand, self).__init__('diff', 'Analyze and identify variables that have changed compared to the specified variable file') + self.parser.add_option('--file', type='string', help="specify initialization parameter file") + self.parser.add_option('--store_dir', type='string', help='the dir to store gather result, current dir by default.', default='./') + self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) + + def init(self, cmd, args): + super(ObdiagAnalyzeVariableDiffCommand, self).init(cmd, args) + self.parser.set_usage('%s [options]' % self.prev_cmd) + return self + + def _do_command(self, obdiag): + return obdiag.analyze_fuction('analyze_variable_diff', self.opts) + + +class ObdiagAnalyzeVariableCommand(MajorCommand): + def __init__(self): + super(ObdiagAnalyzeVariableCommand, self).__init__('variable', 'Analyze oceanbase variables info') + self.register_command(ObdiagAnalyzeVariableDiffCommand()) class ObdiagAnalyzeSQLCommand(ObdiagOriginCommand): @@ -764,29 +793,6 @@ def _do_command(self, obdiag): return obdiag.analyze_fuction('analyze_sql_review', self.opts) -class ObdiagAnalyzeParameterCommand(MajorCommand): - def __init__(self): - super(ObdiagAnalyzeParameterCommand, self).__init__('parameter', 'Analyze oceanbase parameters info') - self.register_command(ObdiagAnalyzeParameterDiffCommand()) - self.register_command(ObdiagAnalyzeParameterNonDefaultCommand()) - - -class ObdiagAnalyzeVariableCommand(ObdiagOriginCommand): - def __init__(self): - super(ObdiagAnalyzeVariableCommand, self).__init__('variable', 'Analyze and identify variables that have changed compared to the specified variable file') - self.parser.add_option('--file', type='string', help="specify initialization parameter file") - self.parser.add_option('--store_dir', type='string', help='the dir to store gather result, current dir by default.', default='./') - self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) - - def init(self, cmd, args): - super(ObdiagAnalyzeVariableCommand, self).init(cmd, args) - self.parser.set_usage('%s [options]' % self.prev_cmd) - return self - - def _do_command(self, obdiag): - return obdiag.analyze_fuction('analyze_variable', self.opts) - - class ObdiagCheckCommand(ObdiagOriginCommand): def __init__(self): @@ -895,6 +901,8 @@ def __init__(self): self.register_command(ObdiagGatherObproxyLogCommand()) self.register_command(ObdiagGatherSceneCommand()) self.register_command(ObdiagGatherAshReportCommand()) + self.register_command(ObdiagGatherParameterCommand()) + self.register_command(ObdiagGatherVariableCommand()) self.register_command(ObdiagGatherTableDumpHandler()) self.register_command(ObdiagGatherParameterCommand()) self.register_command(ObdiagGatherVariableCommand()) @@ -914,6 +922,8 @@ def __init__(self): super(ObdiagAnalyzeCommand, self).__init__('analyze', 'Analyze oceanbase diagnostic info') self.register_command(ObdiagAnalyzeLogCommand()) self.register_command(ObdiagAnalyzeFltTraceCommand()) + self.register_command(ObdiagAnalyzeParameterCommand()) + self.register_command(ObdiagAnalyzeVariableCommand()) self.register_command(ObdiagAnalyzeSQLCommand()) self.register_command(ObdiagAnalyzeSQLReviewCommand()) self.register_command(ObdiagAnalyzeParameterCommand()) diff --git a/handler/analyzer/analyze_parameter.py b/handler/analyzer/analyze_parameter.py index 47b51480..a31ebd38 100644 --- a/handler/analyzer/analyze_parameter.py +++ b/handler/analyzer/analyze_parameter.py @@ -28,7 +28,7 @@ class AnalyzeParameterHandler(object): - def __init__(self, context, analyze_type='non_default'): + def __init__(self, context, analyze_type='default'): self.context = context self.stdio = self.context.stdio self.export_report_path = None @@ -64,8 +64,8 @@ def get_version(self): return observer_version def handle(self): - if self.analyze_type == 'non_default': - if not self.init_option_non_default(): + if self.analyze_type == 'default': + if not self.init_option_default(): self.stdio.error('init option failed') return False else: @@ -76,7 +76,23 @@ def handle(self): DirectoryUtil.mkdir(path=self.export_report_path, stdio=self.stdio) self.execute() - def init_option_non_default(self): + def check_file_valid(self): + with open(self.parameter_file_name, 'r') as f: + header = f.readline() + flag = 1 + if header: + header = header.strip() + if not header: + flag = 0 + if not header.startswith('VERSION'): + flag = 0 + if not header.endswith('ISDEFAULT'): + flag = 0 + if flag == 0: + self.stdio.error('args --file [{0}] is not a valid parameter file, Please specify it again'.format(os.path.abspath(self.parameter_file_name))) + exit(-1) + + def init_option_default(self): options = self.context.options store_dir_option = Util.get_option(options, 'store_dir') offline_file_option = Util.get_option(options, 'file') @@ -97,6 +113,7 @@ def init_option_non_default(self): exit(-1) else: self.parameter_file_name = os.path.abspath(offline_file_option) + self.check_file_valid() return True def init_option_diff(self): @@ -121,9 +138,10 @@ def init_option_diff(self): exit(-1) else: self.parameter_file_name = os.path.abspath(offline_file_option) + self.check_file_valid() return True - def analyze_parameter_non_default(self): + def analyze_parameter_default(self): observer_version = self.get_version() if StringUtils.compare_versions_greater(observer_version, "4.2.2.0"): if self.parameter_file_name is not None: @@ -144,7 +162,7 @@ def analyze_parameter_non_default(self): report_default_tb.add_row([row[1], row[2], row[3], row[4], tenant_id, row[6], row[11], row[7]]) fp.write(report_default_tb.get_string() + "\n") self.stdio.print(report_default_tb.get_string()) - self.stdio.print("Analyze parameter non-default finished. For more details, please run cmd '" + Fore.YELLOW + " cat {0}' ".format(file_name) + Style.RESET_ALL + "'") + self.stdio.print("Analyze parameter default finished. For more details, please run cmd '" + Fore.YELLOW + " cat {0}' ".format(file_name) + Style.RESET_ALL + "'") else: if self.parameter_file_name is None: self.stdio.error("the version of OceanBase is lower than 4.2.2, an initialization parameter file must be provided to find non-default values") @@ -179,9 +197,9 @@ def analyze_parameter_non_default(self): fp.write(report_default_tb.get_string() + "\n") if not is_empty: self.stdio.print(report_default_tb.get_string()) - self.stdio.print("Analyze parameter non-default finished. For more details, please run cmd '" + Fore.YELLOW + " cat {0} ".format(file_name) + Style.RESET_ALL + "'") + self.stdio.print("Analyze parameter default finished. For more details, please run cmd '" + Fore.YELLOW + " cat {0} ".format(file_name) + Style.RESET_ALL + "'") else: - self.stdio.print("Analyze parameter non-default finished. All parameter values are the same as the default values.") + self.stdio.print("Analyze parameter default finished. All parameter values are the same as the default values.") def alalyze_parameter_diff(self): if self.parameter_file_name is None: @@ -240,7 +258,10 @@ def alalyze_parameter_diff(self): if len(value_list) > 0: report_diff_tb = PrettyTable(["name", "diff"]) report_diff_tb.align["task_report"] = "l" - report_diff_tb.title = 'TENANT_ID:' + tenant + if tenant == 'CLUSTER': + report_diff_tb.title = 'SCOPE:' + tenant + else: + report_diff_tb.title = 'SCOPE:TENANT-' + tenant for value_dict in value_list: value_str_list = [] for value in value_dict['value_list']: @@ -258,8 +279,8 @@ def alalyze_parameter_diff(self): def execute(self): try: - if self.analyze_type == 'non_default': - self.analyze_parameter_non_default() + if self.analyze_type == 'default': + self.analyze_parameter_default() elif self.analyze_type == 'diff': self.alalyze_parameter_diff() except Exception as e: diff --git a/handler/analyzer/analyze_variable.py b/handler/analyzer/analyze_variable.py index 478c3c3d..4058c868 100644 --- a/handler/analyzer/analyze_variable.py +++ b/handler/analyzer/analyze_variable.py @@ -26,11 +26,12 @@ class AnalyzeVariableHandler(object): - def __init__(self, context): + def __init__(self, context, analyze_type='diff'): self.context = context self.stdio = self.context.stdio self.export_report_path = None self.variable_file_name = None + self.analyze_type = analyze_type self.ob_cluster = self.context.cluster_config if self.context.get_variable("gather_timestamp", None): self.analyze_timestamp = self.context.get_variable("gather_timestamp") @@ -48,8 +49,8 @@ def __init__(self, context): database="oceanbase", ) except Exception as e: - self.stdio.error("failed to connect to database: {0}".format(e)) - raise OBDIAGFormatException("failed to connect to database: {0}".format(e)) + self.stdio.error("Failed to connect to database: {0}".format(e)) + raise OBDIAGFormatException("Failed to connect to database: {0}".format(e)) def handle(self): if not self.init_option(): @@ -59,6 +60,22 @@ def handle(self): DirectoryUtil.mkdir(path=self.export_report_path, stdio=self.stdio) self.execute() + def check_file_valid(self): + with open(self.variable_file_name, 'r') as f: + header = f.readline() + flag = 1 + if header: + header = header.strip() + if not header: + flag = 0 + if not header.startswith('VERSION'): + flag = 0 + if not header.endswith('RECORD_TIME'): + flag = 0 + if flag == 0: + self.stdio.error('args --file [{0}] is not a valid variable file, Please specify it again'.format(os.path.abspath(self.variable_file_name))) + exit(-1) + def init_option(self): options = self.context.options store_dir_option = Util.get_option(options, 'store_dir') @@ -69,6 +86,7 @@ def init_option(self): exit(-1) else: self.variable_file_name = os.path.abspath(offline_file_option) + self.check_file_valid() else: self.stdio.error("an initialization variable file must be provided to find the parts where variables have changed.") exit(-1) @@ -87,7 +105,7 @@ def init_option(self): return True - def alalyze_variable(self): + def analyze_variable(self): sql = '''select version(), tenant_id, zone, name,gmt_modified, value, flags, min_val, max_val, now() from oceanbase.__all_virtual_sys_variable order by 2, 4, 5''' db_variable_info = self.obconn.execute_sql(sql) @@ -131,6 +149,6 @@ def alalyze_variable(self): def execute(self): try: - self.alalyze_variable() + self.analyze_variable() except Exception as e: self.stdio.error("variable info analyze failed, error message: {0}".format(e)) diff --git a/handler/gather/gather_parameters.py b/handler/gather/gather_parameters.py index ea553faf..187fb779 100644 --- a/handler/gather/gather_parameters.py +++ b/handler/gather/gather_parameters.py @@ -47,8 +47,8 @@ def __init__(self, context, gather_pack_dir='./'): database="oceanbase", ) except Exception as e: - self.stdio.error("failed to connect to database: {0}".format(e)) - raise OBDIAGFormatException("failed to connect to database: {0}".format(e)) + self.stdio.error("Failed to connect to database: {0}".format(e)) + raise OBDIAGFormatException("Failed to connect to database: {0}".format(e)) def handle(self): if not self.init_option(): @@ -110,8 +110,10 @@ def get_parameters_info(self): ''' parameter_info = self.obconn.execute_sql(sql) self.parameter_file_name = self.gather_pack_dir + '/{0}_parameters_{1}.csv'.format(cluster_name, TimeUtils.timestamp_to_filename_time(self.gather_timestamp)) + header = ['VERSION', 'SVR_IP', 'SVR_PORT', 'ZONE', 'SCOPE', 'TENANT_ID', 'NAME', 'VALUE', 'SECTION', 'EDIT_LEVEL', 'RECORD_TIME', 'DEFAULT_VALUE', 'ISDEFAULT'] with open(self.parameter_file_name, 'w', newline='') as file: writer = csv.writer(file) + writer.writerow(header) for row in parameter_info: if row[5] is None: tmp_row = [col for col in row] @@ -121,7 +123,7 @@ def get_parameters_info(self): writer.writerow(row) self.stdio.print("Gather parameters finished. For more details, please run cmd '" + Fore.YELLOW + "cat {0}".format(self.parameter_file_name) + Style.RESET_ALL + "'") else: - self.stdio.warn("failed to retrieve the database version. Please check if the database connection is normal.") + self.stdio.warn("Failed to retrieve the database version. Please check if the database connection is normal.") def execute(self): try: diff --git a/handler/gather/gather_variables.py b/handler/gather/gather_variables.py index 34729a3b..6c49b538 100644 --- a/handler/gather/gather_variables.py +++ b/handler/gather/gather_variables.py @@ -46,8 +46,8 @@ def __init__(self, context, gather_pack_dir='./'): database="oceanbase", ) except Exception as e: - self.stdio.error("failed to connect to database: {0}".format(e)) - raise OBDIAGFormatException("failed to connect to database: {0}".format(e)) + self.stdio.error("Failed to connect to database: {0}".format(e)) + raise OBDIAGFormatException("Failed to connect to database: {0}".format(e)) def handle(self): if not self.init_option(): @@ -64,7 +64,7 @@ def init_option(self): store_dir_option = Util.get_option(options, 'store_dir') if store_dir_option and store_dir_option != "./": if not os.path.exists(os.path.abspath(store_dir_option)): - self.stdio.warn('args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + self.stdio.warn('warn: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) os.makedirs(os.path.abspath(store_dir_option)) self.gather_pack_dir = os.path.abspath(store_dir_option) return True @@ -86,8 +86,10 @@ def get_variables_info(self): from oceanbase.__all_virtual_sys_variable order by 2, 4, 5''' variable_info = self.obconn.execute_sql(sql) self.variable_file_name = self.gather_pack_dir + '/{0}_variables_{1}.csv'.format(cluster_name, TimeUtils.timestamp_to_filename_time(self.gather_timestamp)) + header = ['VERSION', 'TENANT_ID', 'ZONE', 'NAME', 'GMT_MODIFIED', 'VALUE', 'FLAGS', 'MIN_VALUE', 'MAX_VALUE', 'RECORD_TIME'] with open(self.variable_file_name, 'w', newline='') as file: writer = csv.writer(file) + writer.writerow(header) for row in variable_info: writer.writerow(row) self.stdio.print("Gather variables finished. For more details, please run cmd '" + Fore.YELLOW + "cat {0}".format(self.variable_file_name) + Style.RESET_ALL + "'") From 94d20d7e3c3e3344a0604450338055de1972b18e Mon Sep 17 00:00:00 2001 From: "jingshun.tq" <35712518+Teingi@users.noreply.github.com> Date: Tue, 16 Jul 2024 16:12:21 +0800 Subject: [PATCH 15/68] remove duplicate code & optimize log (#333) * table dump print pretty result * Fix regression testing bugs * Fix regression testing bugs * Optimize logs * Optimize logs * Optimize logs * fix: gather tabledump * fix: gather tabledump * fix analyze flt_trace offline * remove duplicate code & optimize log * remove duplicate code & optimize log --- diag_cmd.py | 4 ---- handler/analyzer/analyze_variable.py | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/diag_cmd.py b/diag_cmd.py index cc54650a..e6f18c84 100644 --- a/diag_cmd.py +++ b/diag_cmd.py @@ -901,8 +901,6 @@ def __init__(self): self.register_command(ObdiagGatherObproxyLogCommand()) self.register_command(ObdiagGatherSceneCommand()) self.register_command(ObdiagGatherAshReportCommand()) - self.register_command(ObdiagGatherParameterCommand()) - self.register_command(ObdiagGatherVariableCommand()) self.register_command(ObdiagGatherTableDumpHandler()) self.register_command(ObdiagGatherParameterCommand()) self.register_command(ObdiagGatherVariableCommand()) @@ -926,8 +924,6 @@ def __init__(self): self.register_command(ObdiagAnalyzeVariableCommand()) self.register_command(ObdiagAnalyzeSQLCommand()) self.register_command(ObdiagAnalyzeSQLReviewCommand()) - self.register_command(ObdiagAnalyzeParameterCommand()) - self.register_command(ObdiagAnalyzeVariableCommand()) class ObdiagRCACommand(MajorCommand): diff --git a/handler/analyzer/analyze_variable.py b/handler/analyzer/analyze_variable.py index 4058c868..7eafccb9 100644 --- a/handler/analyzer/analyze_variable.py +++ b/handler/analyzer/analyze_variable.py @@ -88,7 +88,7 @@ def init_option(self): self.variable_file_name = os.path.abspath(offline_file_option) self.check_file_valid() else: - self.stdio.error("an initialization variable file must be provided to find the parts where variables have changed.") + self.stdio.error("args --file need provided to find the parts where variables have changed.") exit(-1) if store_dir_option and store_dir_option != "./": From f5f1691b3b3e785ba704cdadd34bf9a35a47fc4b Mon Sep 17 00:00:00 2001 From: wayyoungboy <35394786+wayyoungboy@users.noreply.github.com> Date: Tue, 16 Jul 2024 16:43:14 +0800 Subject: [PATCH 16/68] 2.3.0 build new image (#334) * fix KubernetesClient * fix KubernetesClient * fix KubernetesClient * fix KubernetesClient * fix "nodename nor servname provided" * delete build tag * fix ssh stdio print * fix remote_client * fix example operator.yml * fix get_obproxy_version on rca * fix : cat with grep >> grep -e * fix stack * new builder image * delete test branch --- .github/workflows/build_package.yml | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/.github/workflows/build_package.yml b/.github/workflows/build_package.yml index d6a8be8c..f2079d4e 100644 --- a/.github/workflows/build_package.yml +++ b/.github/workflows/build_package.yml @@ -16,7 +16,7 @@ jobs: name: Run on CentOS 7 runs-on: ubuntu-latest container: - image: "centos:7" + image: "oceanbase/obdiag-builder:latest" steps: - name: Checkout code @@ -24,20 +24,10 @@ jobs: - name: Install dependencies run: | - rm -rf /etc/yum.repos.d/* - curl -o /etc/yum.repos.d/CentOS-Base.repo https://mirrors.aliyun.com/repo/Centos-7.repo - yum install -y wget - wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh --no-check-certificate - sh Miniconda3-latest-Linux-x86_64.sh -p /opt/miniconda3 -b export PATH=/opt/miniconda3/bin:$PATH - conda init - conda create --name obdiag python=3.8 -y source activate obdiag ldd --version - python3 -m pip install --upgrade pip setuptools wheel pip3 install -r requirements3.txt - python3 --version - yum install rpm-build -y - name: Build package run: | From 7a3c38661683fe83a8979b9efcc76a3887a0d67e Mon Sep 17 00:00:00 2001 From: "jingshun.tq" <35712518+Teingi@users.noreply.github.com> Date: Tue, 16 Jul 2024 17:09:06 +0800 Subject: [PATCH 17/68] fix: command auto-completion (#335) * table dump print pretty result * Fix regression testing bugs * Fix regression testing bugs * Optimize logs * Optimize logs * Optimize logs * fix: gather tabledump * fix: gather tabledump * fix analyze flt_trace offline * remove duplicate code & optimize log * remove duplicate code & optimize log * fix: command auto-completion * fix: command auto-completion * fix: command auto-completion --- init_obdiag_cmd.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/init_obdiag_cmd.sh b/init_obdiag_cmd.sh index 86676960..dc292fcf 100644 --- a/init_obdiag_cmd.sh +++ b/init_obdiag_cmd.sh @@ -21,7 +21,9 @@ _obdiag_completion() { if [ "$COMP_CWORD" -eq 2 ]; then type_list="log flt_trace sql sql_review parameter variable" elif [ "${COMP_WORDS[2]}" = "parameter" ] && [ "$COMP_CWORD" -eq 3 ]; then - type_list="diff non-default" + type_list="diff default" + elif [ "${COMP_WORDS[2]}" = "variable" ] && [ "$COMP_CWORD" -eq 3 ]; then + type_list="diff" fi ;; rca) @@ -38,7 +40,10 @@ _obdiag_completion() { type_list="list run" COMPREPLY=($(compgen -W "${type_list}" -- "${cur_word}")) elif [ "${COMP_WORDS[1]}" = "analyze" ] && [ "${COMP_WORDS[2]}" = "parameter" ]; then - type_list="diff non-default" + type_list="diff default" + COMPREPLY=($(compgen -W "${type_list}" -- "${cur_word}")) + elif [ "${COMP_WORDS[1]}" = "analyze" ] && [ "${COMP_WORDS[2]}" = "variable" ]; then + type_list="diff" COMPREPLY=($(compgen -W "${type_list}" -- "${cur_word}")) fi ;; From cc77beef14165845ce4d9c94238a9cc701828039 Mon Sep 17 00:00:00 2001 From: sunpeng <35529415+oraclebird@users.noreply.github.com> Date: Tue, 16 Jul 2024 17:24:06 +0800 Subject: [PATCH 18/68] =?UTF-8?q?fix:=20obdiag=E5=AE=9E=E7=8E=B0=E5=8F=82?= =?UTF-8?q?=E6=95=B0=E3=80=81=E5=8F=98=E9=87=8F=E9=87=87=E9=9B=86=E5=92=8C?= =?UTF-8?q?=E6=AF=94=E5=AF=B9=E5=8A=9F=E8=83=BD=20(#336)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * obdiag gather and analyze parameters/variables * obdiag gather and analyze parameters/variables * obdiag gather and analyze parameters/variables * obdiag gather and analyze parameters/variables * CSV添加表头 * obdiag gather and analyze parameters/variables * 添加文件有效性验证 * 添加文件有效性验证 * 修改参数范围表述 * 修改重复项和描述 * 变量分析中采集时间显示BUG修复 --- handler/analyzer/analyze_parameter.py | 4 ++++ handler/analyzer/analyze_variable.py | 2 ++ 2 files changed, 6 insertions(+) diff --git a/handler/analyzer/analyze_parameter.py b/handler/analyzer/analyze_parameter.py index a31ebd38..8dfa344c 100644 --- a/handler/analyzer/analyze_parameter.py +++ b/handler/analyzer/analyze_parameter.py @@ -180,6 +180,8 @@ def analyze_parameter_default(self): with open(self.parameter_file_name, 'r', newline='') as file: reader = csv.reader(file) for row in reader: + if row[0] == 'VERSION': + continue key = str(row[1]) + '-' + str(row[2]) + '-' + str(row[3]) + '-' + str(row[4]) + '-' + str(row[5]) + '-' + str(row[6]) value = row[7] file_parameter_dict[key] = value @@ -211,6 +213,8 @@ def alalyze_parameter_diff(self): with open(self.parameter_file_name, 'r', newline='') as file: reader = csv.reader(file) for row in reader: + if row[0] == 'VERSION': + continue parameter_info.append(row) tenants_dict = dict() for row in parameter_info: diff --git a/handler/analyzer/analyze_variable.py b/handler/analyzer/analyze_variable.py index 7eafccb9..ca07ede4 100644 --- a/handler/analyzer/analyze_variable.py +++ b/handler/analyzer/analyze_variable.py @@ -118,6 +118,8 @@ def analyze_variable(self): with open(self.variable_file_name, 'r', newline='') as file: reader = csv.reader(file) for row in reader: + if row[0] == 'VERSION': + continue key = str(row[1]) + '-' + str(row[3]) file_variable_dict[key] = str(row[5]) if not last_gather_time: From 21e94e2024368b2630b6458bee5e38b4f0c721e4 Mon Sep 17 00:00:00 2001 From: sunpeng <35529415+oraclebird@users.noreply.github.com> Date: Tue, 16 Jul 2024 17:38:17 +0800 Subject: [PATCH 19/68] =?UTF-8?q?=20fix:=20obdiag=E5=AE=9E=E7=8E=B0?= =?UTF-8?q?=E5=8F=82=E6=95=B0=E3=80=81=E5=8F=98=E9=87=8F=E9=87=87=E9=9B=86?= =?UTF-8?q?=E5=92=8C=E6=AF=94=E5=AF=B9=E5=8A=9F=E8=83=BD=20(#337)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * obdiag gather and analyze parameters/variables * obdiag gather and analyze parameters/variables * obdiag gather and analyze parameters/variables * obdiag gather and analyze parameters/variables * CSV添加表头 * obdiag gather and analyze parameters/variables * 添加文件有效性验证 * 添加文件有效性验证 * 修改参数范围表述 * 修改重复项和描述 * 变量分析中采集时间显示BUG修复 * 打印bug fix --- handler/analyzer/analyze_parameter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/handler/analyzer/analyze_parameter.py b/handler/analyzer/analyze_parameter.py index 8dfa344c..0cd4e58b 100644 --- a/handler/analyzer/analyze_parameter.py +++ b/handler/analyzer/analyze_parameter.py @@ -162,7 +162,7 @@ def analyze_parameter_default(self): report_default_tb.add_row([row[1], row[2], row[3], row[4], tenant_id, row[6], row[11], row[7]]) fp.write(report_default_tb.get_string() + "\n") self.stdio.print(report_default_tb.get_string()) - self.stdio.print("Analyze parameter default finished. For more details, please run cmd '" + Fore.YELLOW + " cat {0}' ".format(file_name) + Style.RESET_ALL + "'") + self.stdio.print("Analyze parameter default finished. For more details, please run cmd '" + Fore.YELLOW + " cat {0}' ".format(file_name) + Style.RESET_ALL) else: if self.parameter_file_name is None: self.stdio.error("the version of OceanBase is lower than 4.2.2, an initialization parameter file must be provided to find non-default values") From e5a9f0048df666809a7f30609b124bf798b9412a Mon Sep 17 00:00:00 2001 From: "jingshun.tq" <35712518+Teingi@users.noreply.github.com> Date: Tue, 16 Jul 2024 20:13:41 +0800 Subject: [PATCH 20/68] fix analyze_parameter/analyze_variable log (#338) * table dump print pretty result * Fix regression testing bugs * Fix regression testing bugs * Optimize logs * Optimize logs * Optimize logs * fix: gather tabledump * fix: gather tabledump * fix analyze flt_trace offline * remove duplicate code & optimize log * remove duplicate code & optimize log * fix: command auto-completion * fix: command auto-completion * fix: command auto-completion * fix analyze_parameter/analyze_variable log --- handler/analyzer/analyze_parameter.py | 2 +- handler/analyzer/analyze_variable.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/handler/analyzer/analyze_parameter.py b/handler/analyzer/analyze_parameter.py index 0cd4e58b..bb2d8117 100644 --- a/handler/analyzer/analyze_parameter.py +++ b/handler/analyzer/analyze_parameter.py @@ -135,7 +135,7 @@ def init_option_diff(self): if offline_file_option: if not os.path.exists(os.path.abspath(offline_file_option)): self.stdio.error('args --file [{0}] not exist: No such file, Please specify it again'.format(os.path.abspath(offline_file_option))) - exit(-1) + return False else: self.parameter_file_name = os.path.abspath(offline_file_option) self.check_file_valid() diff --git a/handler/analyzer/analyze_variable.py b/handler/analyzer/analyze_variable.py index ca07ede4..fd2255dc 100644 --- a/handler/analyzer/analyze_variable.py +++ b/handler/analyzer/analyze_variable.py @@ -83,13 +83,13 @@ def init_option(self): if offline_file_option: if not os.path.exists(os.path.abspath(offline_file_option)): self.stdio.error('args --file [{0}] not exist: No such file, Please specify it again'.format(os.path.abspath(offline_file_option))) - exit(-1) + return False else: self.variable_file_name = os.path.abspath(offline_file_option) self.check_file_valid() else: self.stdio.error("args --file need provided to find the parts where variables have changed.") - exit(-1) + return False if store_dir_option and store_dir_option != "./": if not os.path.exists(os.path.abspath(store_dir_option)): From b54e3a3af7621b14c0568411ceeb681b961de5ac Mon Sep 17 00:00:00 2001 From: wayyoungboy <35394786+wayyoungboy@users.noreply.github.com> Date: Wed, 17 Jul 2024 10:46:37 +0800 Subject: [PATCH 21/68] fix gather_obproxy_log.py (#341) --- handler/gather/gather_obproxy_log.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/handler/gather/gather_obproxy_log.py b/handler/gather/gather_obproxy_log.py index 9f7b60fd..ee32c8b7 100644 --- a/handler/gather/gather_obproxy_log.py +++ b/handler/gather/gather_obproxy_log.py @@ -274,7 +274,7 @@ def __pharse_log(self, ssh_client, home_path, log_name, gather_path): grep_cmd = "grep -e '{0}' ".format(grep_arg) + "{log_dir}/{log_name}".format(log_name=log_name, log_dir=log_path) continue grep_cmd += "| grep -e '{0}'".format(grep_arg) - grep_cmd += " >> {log_dir}/{log_name}".format(log_name=log_name, log_dir=log_path) + grep_cmd += " >> {gather_path}/{log_name}".format(log_name=log_name, gather_path=gather_path) self.stdio.verbose("grep files, run cmd = [{0}]".format(grep_cmd)) ssh_client.exec_cmd(grep_cmd) else: From 2b2360905ba97ff0a929be3d47302cca211253b0 Mon Sep 17 00:00:00 2001 From: wayyoungboy <35394786+wayyoungboy@users.noreply.github.com> Date: Wed, 17 Jul 2024 19:11:23 +0800 Subject: [PATCH 22/68] fix gather all (#344) * fix gather_obproxy_log.py * fix sshclient upload * build test package * delete test package --- common/ssh_client/local_client.py | 2 +- common/ssh_client/remote_client.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/common/ssh_client/local_client.py b/common/ssh_client/local_client.py index 52ee2ca9..b7d849d6 100644 --- a/common/ssh_client/local_client.py +++ b/common/ssh_client/local_client.py @@ -48,7 +48,7 @@ def upload(self, remote_path, local_path): try: shutil.copy(local_path, remote_path) except Exception as e: - self.stdio.error("upload file to localhost, remote _path =[{0}], local _path=[{1}], error=[{2}]".format(remote_path, local_path, str(e))) + self.stdio.error("upload file to localhost, remote_path =[{0}], local_path=[{1}], error=[{2}]".format(remote_path, local_path, str(e))) raise Exception("[local] upload file to localhost, remote _path =[{0}], local _path=[{1}], error=[{2}]".format(remote_path, local_path, str(e))) def ssh_invoke_shell_switch_user(self, new_user, cmd, time_out): diff --git a/common/ssh_client/remote_client.py b/common/ssh_client/remote_client.py index 6bcfdaf6..b128be0e 100644 --- a/common/ssh_client/remote_client.py +++ b/common/ssh_client/remote_client.py @@ -105,7 +105,7 @@ def progress_bar(self, transferred, to_be_transferred, suffix=''): def upload(self, remote_path, local_path): transport = self._ssh_fd.get_transport() self._sftp_client = paramiko.SFTPClient.from_transport(transport) - self._sftp_client.put(remote_path, local_path) + self._sftp_client.put(local_path, remote_path) self._sftp_client.close() def ssh_invoke_shell_switch_user(self, new_user, cmd, time_out): From 71283b69699f61bb8af2032045d4ef12dee9bcf4 Mon Sep 17 00:00:00 2001 From: jingyd66 <82036232+jingyd66@users.noreply.github.com> Date: Wed, 17 Jul 2024 19:26:01 +0800 Subject: [PATCH 23/68] update0717 (#343) --- handler/rca/scene/index_ddl_error_scene.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/handler/rca/scene/index_ddl_error_scene.py b/handler/rca/scene/index_ddl_error_scene.py index 0d28ff5a..e9ffc39e 100644 --- a/handler/rca/scene/index_ddl_error_scene.py +++ b/handler/rca/scene/index_ddl_error_scene.py @@ -136,6 +136,9 @@ def execute(self): self.verbose("gather rootservice.log by {0}".format(self.trace_id)) work_path_rs = self.store_dir + "/{0}_on_rs/".format(self.trace_id) self.gather_log.set_parameters("scope", "rootservice") + if self.input_parameters.get("since") is not None: + since = self.input_parameters.get("since") + self.gather_log.set_parameters("since", since) self.gather_log.grep("{0}".format(self.trace_id)) logs_name = self.gather_log.execute(save_path=work_path_rs) if logs_name is None or len(logs_name) <= 0: @@ -161,6 +164,9 @@ def execute(self): self.verbose("gather observer.log by {0}".format(self.trace_id)) work_path_ddl_wait_trans_end_ctx_try_wait = self.store_dir + "/{0}_on_obs/".format(self.trace_id) self.gather_log.set_parameters("scope", "observer") + if self.input_parameters.get("since") is not None: + since = self.input_parameters.get("since") + self.gather_log.set_parameters("since", since) self.gather_log.grep("{0}".format(self.trace_id)) logs_name = self.gather_log.execute(save_path=work_path_ddl_wait_trans_end_ctx_try_wait) if logs_name is None or len(logs_name) <= 0: @@ -176,6 +182,9 @@ def execute(self): # index_sstable_build_task_finish work_path_index_sstable_build_task_finish = self.store_dir + "/{0}_on_obs/".format(self.trace_id) self.gather_log.set_parameters("scope", "observer") + if self.input_parameters.get("since") is not None: + since = self.input_parameters.get("since") + self.gather_log.set_parameters("since", since) self.gather_log.grep("{0}".format(self.trace_id)) logs_name = self.gather_log.execute(save_path=work_path_index_sstable_build_task_finish) if logs_name is None or len(logs_name) <= 0: @@ -191,6 +200,9 @@ def execute(self): self.verbose("gather rootservice.log by {0}".format(self.trace_id)) work_path_rs = self.store_dir + "/{0}_on_rs/".format(self.trace_id) self.gather_log.set_parameters("scope", "rootservice") + if self.input_parameters.get("since") is not None: + since = self.input_parameters.get("since") + self.gather_log.set_parameters("since", since) self.gather_log.grep("{0}".format(self.trace_id)) logs_name = self.gather_log.execute(save_path=work_path_rs) if logs_name is None or len(logs_name) <= 0: From e563a6986fa2d8be3d3e1e805abb4bbfa9366eee Mon Sep 17 00:00:00 2001 From: "jingshun.tq" <35712518+Teingi@users.noreply.github.com> Date: Wed, 17 Jul 2024 20:28:41 +0800 Subject: [PATCH 24/68] add gather scene sql (#345) * table dump print pretty result * Fix regression testing bugs * Fix regression testing bugs * Optimize logs * Optimize logs * Optimize logs * fix: gather tabledump * fix: gather tabledump * fix analyze flt_trace offline * remove duplicate code & optimize log * remove duplicate code & optimize log * fix: command auto-completion * fix: command auto-completion * fix: command auto-completion * fix analyze_parameter/analyze_variable log * add gathering scenario sql --- handler/gather/tasks/observer/clog_disk_full.yaml | 9 +++++++++ handler/gather/tasks/observer/compaction.yaml | 9 +++++++++ .../tasks/observer/delay_of_primary_and_backup.yaml | 6 ++++++ handler/gather/tasks/observer/log_archive.yaml | 3 +++ handler/gather/tasks/observer/long_transaction.yaml | 3 +++ handler/gather/tasks/observer/memory.yaml | 5 ++++- handler/gather/tasks/observer/recovery.yaml | 3 +++ handler/gather/tasks/observer/rootservice_switch.yaml | 3 +++ handler/gather/tasks/observer/suspend_transaction.yaml | 3 +++ handler/gather/tasks/observer/unit_data_imbalance.yaml | 5 ++++- handler/gather/tasks/observer/unknown.yaml | 9 +++++++++ 11 files changed, 56 insertions(+), 2 deletions(-) diff --git a/handler/gather/tasks/observer/clog_disk_full.yaml b/handler/gather/tasks/observer/clog_disk_full.yaml index ecedeab0..3131b991 100644 --- a/handler/gather/tasks/observer/clog_disk_full.yaml +++ b/handler/gather/tasks/observer/clog_disk_full.yaml @@ -105,6 +105,15 @@ task: - type: sql sql: "select * from oceanbase.DBA_OB_SERVER_EVENT_HISTORY where event like '%migrat%' and name6 like '%fail%' and value6=1;" global: true + - type: sql + sql: "select b.zone, a.svr_ip,a.svr_port,b.with_rootserver, round(a.total_size/1024/1024/1024,0) as total_GB,round(a.free_size/1024/1024/1024,0) as free_GB, round((a.total_size-a.free_size)/1024/1024/1024,0) as used_GB,(a.total_size-a.free_size)/a.total_size as used_percent from oceanbase.__all_virtual_disk_stat a, oceanbase.DBA_OB_SERVERS b where a.svr_ip=b.svr_ip and a.svr_port=b.svr_port order by b.zone,used_percent desc;" + global: true + - type: sql + sql: "select tenant_name, svr_ip, memstore_limit /(1024 * 1024 * 1024) as memstore_limit_GB, freeze_trigger /(1024 * 1024 * 1024) as freeze_trigger_GB, memstore_used /(1024 * 1024 * 1024) as memstore_used_GB, concat((memstore_used * 100 / memstore_limit), '%') as memstore_used_percent, active_span /(1024 * 1024 * 1024) as active_span_GB, freeze_cnt from oceanbase.GV$OB_MEMSTORE memstore_info inner join oceanbase.DBA_OB_TENANTS tenant on memstore_info.tenant_id = tenant.tenant_id ORDER BY tenant.tenant_name,svr_ip;" + global: true + - type: sql # 查看每台机器上的tablet分布 + sql: "select svr_ip, svr_port, ls_id, count(*) from oceanbase.CDB_OB_TABLE_LOCATIONS group by svr_ip, svr_port, ls_id;" + global: true - type: ssh ssh: "df -h" global: false diff --git a/handler/gather/tasks/observer/compaction.yaml b/handler/gather/tasks/observer/compaction.yaml index 87f9dbbf..f59fcbcb 100644 --- a/handler/gather/tasks/observer/compaction.yaml +++ b/handler/gather/tasks/observer/compaction.yaml @@ -150,6 +150,15 @@ task: - type: sql sql: "select t.tenant_name, t1.database_name, round(sum(t2.data_size)/1024/1024/1024,2) as data_size_gb, round(sum(t2.required_size)/1024/1024/1024,2) as required_size_gb from oceanbase.dba_ob_tenants t, oceanbase.cdb_ob_table_locations t1, oceanbase.cdb_ob_tablet_replicas t2 where t.tenant_id=t1.tenant_id and t1.svr_ip=t2.svr_ip and t1.tenant_id=t2.tenant_id and t1.ls_id=t2.ls_id and t1.tablet_id=t2.tablet_id and t1.role='leader' group by t.tenant_name, t1.database_name order by data_size_gb desc;" global: true + - type: sql + sql: "select b.zone, a.svr_ip,a.svr_port,b.with_rootserver, round(a.total_size/1024/1024/1024,0) as total_GB,round(a.free_size/1024/1024/1024,0) as free_GB, round((a.total_size-a.free_size)/1024/1024/1024,0) as used_GB,(a.total_size-a.free_size)/a.total_size as used_percent from oceanbase.__all_virtual_disk_stat a, oceanbase.DBA_OB_SERVERS b where a.svr_ip=b.svr_ip and a.svr_port=b.svr_port order by b.zone,used_percent desc;" + global: true + - type: sql + sql: "select tenant_name, svr_ip, memstore_limit /(1024 * 1024 * 1024) as memstore_limit_GB, freeze_trigger /(1024 * 1024 * 1024) as freeze_trigger_GB, memstore_used /(1024 * 1024 * 1024) as memstore_used_GB, concat((memstore_used * 100 / memstore_limit), '%') as memstore_used_percent, active_span /(1024 * 1024 * 1024) as active_span_GB, freeze_cnt from oceanbase.GV$OB_MEMSTORE memstore_info inner join oceanbase.DBA_OB_TENANTS tenant on memstore_info.tenant_id = tenant.tenant_id ORDER BY tenant.tenant_name,svr_ip;" + global: true + - type: sql # 查看每台机器上的tablet分布 + sql: "select svr_ip, svr_port, ls_id, count(*) from oceanbase.CDB_OB_TABLE_LOCATIONS group by svr_ip, svr_port, ls_id;" + global: true - type: log global: false grep: "" diff --git a/handler/gather/tasks/observer/delay_of_primary_and_backup.yaml b/handler/gather/tasks/observer/delay_of_primary_and_backup.yaml index 70a52e65..2219d22b 100644 --- a/handler/gather/tasks/observer/delay_of_primary_and_backup.yaml +++ b/handler/gather/tasks/observer/delay_of_primary_and_backup.yaml @@ -156,6 +156,12 @@ task: - type: sql sql: "SELECT LS_ID, SCN_TO_TIMESTAMP(END_SCN) FROM oceanbase.GV$OB_LOG_STAT WHERE ROLE = 'LEADER';" global: true + - type: sql + sql: "select b.zone, a.svr_ip,a.svr_port,b.with_rootserver, round(a.total_size/1024/1024/1024,0) as total_GB,round(a.free_size/1024/1024/1024,0) as free_GB, round((a.total_size-a.free_size)/1024/1024/1024,0) as used_GB,(a.total_size-a.free_size)/a.total_size as used_percent from oceanbase.__all_virtual_disk_stat a, oceanbase.DBA_OB_SERVERS b where a.svr_ip=b.svr_ip and a.svr_port=b.svr_port order by b.zone,used_percent desc;" + global: true + - type: sql # 查看每台机器上的tablet分布 + sql: "select svr_ip, svr_port, ls_id, count(*) from oceanbase.CDB_OB_TABLE_LOCATIONS group by svr_ip, svr_port, ls_id;" + global: true - type: log global: false grep: "" diff --git a/handler/gather/tasks/observer/log_archive.yaml b/handler/gather/tasks/observer/log_archive.yaml index 2d2908f0..c6d7c9ae 100644 --- a/handler/gather/tasks/observer/log_archive.yaml +++ b/handler/gather/tasks/observer/log_archive.yaml @@ -120,6 +120,9 @@ task: - type: sql sql: "SELECT * FROM oceanbase.CDB_OB_ARCHIVELOG_PIECE_FILES limit 20" global: true + - type: sql # 查看每台机器上的tablet分布 + sql: "select svr_ip, svr_port, ls_id, count(*) from oceanbase.CDB_OB_TABLE_LOCATIONS group by svr_ip, svr_port, ls_id;" + global: true - type: log global: false grep: "" diff --git a/handler/gather/tasks/observer/long_transaction.yaml b/handler/gather/tasks/observer/long_transaction.yaml index cbd4c9a4..c59a2de8 100644 --- a/handler/gather/tasks/observer/long_transaction.yaml +++ b/handler/gather/tasks/observer/long_transaction.yaml @@ -84,6 +84,9 @@ task: - type: sql sql: "SELECT count(1) FROM oceanbase.GV$OB_TRANSACTION_PARTICIPANTS WHERE CTX_CREATE_TIME < date_sub(now(), INTERVAL 600 SECOND) AND STATE = 'INIT';" global: true + - type: sql + sql: "select tenant_name, svr_ip, memstore_limit /(1024 * 1024 * 1024) as memstore_limit_GB, freeze_trigger /(1024 * 1024 * 1024) as freeze_trigger_GB, memstore_used /(1024 * 1024 * 1024) as memstore_used_GB, concat((memstore_used * 100 / memstore_limit), '%') as memstore_used_percent, active_span /(1024 * 1024 * 1024) as active_span_GB, freeze_cnt from oceanbase.GV$OB_MEMSTORE memstore_info inner join oceanbase.DBA_OB_TENANTS tenant on memstore_info.tenant_id = tenant.tenant_id ORDER BY tenant.tenant_name,svr_ip;" + global: true - type: log grep: "" global: false diff --git a/handler/gather/tasks/observer/memory.yaml b/handler/gather/tasks/observer/memory.yaml index f2650f19..cd96d560 100644 --- a/handler/gather/tasks/observer/memory.yaml +++ b/handler/gather/tasks/observer/memory.yaml @@ -94,7 +94,10 @@ task: sql: "show parameters like '%syslog_io_bandwidth_limit%';" global: true - type: sql - sql: "select * from oceanbase.GV$OB_MEMSTORE limit 20" + sql: "select tenant_name, svr_ip, memstore_limit /(1024 * 1024 * 1024) as memstore_limit_GB, freeze_trigger /(1024 * 1024 * 1024) as freeze_trigger_GB, memstore_used /(1024 * 1024 * 1024) as memstore_used_GB, concat((memstore_used * 100 / memstore_limit), '%') as memstore_used_percent, active_span /(1024 * 1024 * 1024) as active_span_GB, freeze_cnt from oceanbase.GV$OB_MEMSTORE memstore_info inner join oceanbase.DBA_OB_TENANTS tenant on memstore_info.tenant_id = tenant.tenant_id ORDER BY tenant.tenant_name,svr_ip;" + global: true + - type: sql # 大于1g的内存模块 + sql: "SELECT CASE WHEN tenant_name IS NULL THEN TENANT_ID ELSE tenant_name END tenant_name, host,ctx_name, mod_name, hold, used, count FROM ( SELECT tenant_name,tenant_id,HOST,ctx_name,mod_name,hold,used,COUNT, ROW_NUMBER () OVER ( PARTITION BY tenant_name, HOST ORDER BY hold desc) rnum FROM (SELECT b.tenant_name, a.tenant_id, concat(a.svr_ip, ':', a.svr_port) HOST, a.ctx_name, a.mod_name, round(a.hold / 1024 / 1024 / 1024) hold, round(a.used / 1024 / 1024 / 1024) used, a.COUNT FROM oceanbase.__all_virtual_memory_info a LEFT JOIN oceanbase.__all_tenant b ON a.TENANT_ID = b.TENANT_ID WHERE a.hold > 1024 * 1024 * 1024 ));" global: true - type: ssh # 可看到租户的规格、线程、队列及请求统计等信息,且这条日志每个租户每 30s 打印一次 ssh: "grep 'dump tenant info.tenant=' ${observer_data_dir}/log/observer.log | sed 's/,/,/g'" diff --git a/handler/gather/tasks/observer/recovery.yaml b/handler/gather/tasks/observer/recovery.yaml index 1d858159..10ef5441 100644 --- a/handler/gather/tasks/observer/recovery.yaml +++ b/handler/gather/tasks/observer/recovery.yaml @@ -89,6 +89,9 @@ task: - type: sql sql: "SELECT * FROM oceanbase.CDB_OB_RESTORE_HISTORY limit 20;" global: true + - type: sql # 查看每台机器上的tablet分布 + sql: "select svr_ip, svr_port, ls_id, count(*) from oceanbase.CDB_OB_TABLE_LOCATIONS group by svr_ip, svr_port, ls_id;" + global: true - type: log grep: "" global: false diff --git a/handler/gather/tasks/observer/rootservice_switch.yaml b/handler/gather/tasks/observer/rootservice_switch.yaml index be72be4e..c0ea7f71 100644 --- a/handler/gather/tasks/observer/rootservice_switch.yaml +++ b/handler/gather/tasks/observer/rootservice_switch.yaml @@ -132,6 +132,9 @@ task: - type: sql sql: "SELECT TENANT_NAME, TENANT_ID, TENANT_ROLE, STATUS, SWITCHOVER_STATUS FROM oceanbase.DBA_OB_TENANTS" global: true + - type: sql # 大于1g的内存模块 + sql: "SELECT CASE WHEN tenant_name IS NULL THEN TENANT_ID ELSE tenant_name END tenant_name, host,ctx_name, mod_name, hold, used, count FROM ( SELECT tenant_name,tenant_id,HOST,ctx_name,mod_name,hold,used,COUNT, ROW_NUMBER () OVER ( PARTITION BY tenant_name, HOST ORDER BY hold desc) rnum FROM (SELECT b.tenant_name, a.tenant_id, concat(a.svr_ip, ':', a.svr_port) HOST, a.ctx_name, a.mod_name, round(a.hold / 1024 / 1024 / 1024) hold, round(a.used / 1024 / 1024 / 1024) used, a.COUNT FROM oceanbase.__all_virtual_memory_info a LEFT JOIN oceanbase.__all_tenant b ON a.TENANT_ID = b.TENANT_ID WHERE a.hold > 1024 * 1024 * 1024 ));" + global: true - type: log grep: "" global: false diff --git a/handler/gather/tasks/observer/suspend_transaction.yaml b/handler/gather/tasks/observer/suspend_transaction.yaml index 59001c27..5f2d45cb 100644 --- a/handler/gather/tasks/observer/suspend_transaction.yaml +++ b/handler/gather/tasks/observer/suspend_transaction.yaml @@ -81,6 +81,9 @@ task: - type: sql sql: "SELECT count(1) FROM oceanbase.GV$OB_TRANSACTION_PARTICIPANTS WHERE CTX_CREATE_TIME < date_sub(now(), INTERVAL 600 SECOND) AND (STATE = 'PREPARE' OR STATE = 'REDO COMPLETE' OR STATE ='PRECOMMIT');" global: true + - type: sql + sql: "select tenant_name, svr_ip, memstore_limit /(1024 * 1024 * 1024) as memstore_limit_GB, freeze_trigger /(1024 * 1024 * 1024) as freeze_trigger_GB, memstore_used /(1024 * 1024 * 1024) as memstore_used_GB, concat((memstore_used * 100 / memstore_limit), '%') as memstore_used_percent, active_span /(1024 * 1024 * 1024) as active_span_GB, freeze_cnt from oceanbase.GV$OB_MEMSTORE memstore_info inner join oceanbase.DBA_OB_TENANTS tenant on memstore_info.tenant_id = tenant.tenant_id ORDER BY tenant.tenant_name,svr_ip;" + global: true - type: log grep: "" global: false diff --git a/handler/gather/tasks/observer/unit_data_imbalance.yaml b/handler/gather/tasks/observer/unit_data_imbalance.yaml index b9e7b54e..a81f5a10 100644 --- a/handler/gather/tasks/observer/unit_data_imbalance.yaml +++ b/handler/gather/tasks/observer/unit_data_imbalance.yaml @@ -154,7 +154,10 @@ task: sql: "select t.tenant_name, t1.database_name, round(sum(t2.data_size)/1024/1024/1024,2) as data_size_gb, round(sum(t2.required_size)/1024/1024/1024,2) as required_size_gb from oceanbase.dba_ob_tenants t, oceanbase.cdb_ob_table_locations t1, oceanbase.cdb_ob_tablet_replicas t2 where t.tenant_id=t1.tenant_id and t1.svr_ip=t2.svr_ip and t1.tenant_id=t2.tenant_id and t1.ls_id=t2.ls_id and t1.tablet_id=t2.tablet_id and t1.role='leader' group by t.tenant_name, t1.database_name order by data_size_gb desc;" global: true - type: sql - sql: "select svr_ip,total_size / 1024 / 1024 / 1024 total_G,free_size / 1024 / 1024 / 1024 free_G,(total_size - free_size) / 1024 / 1024 / 1024 used_G,(total_size - free_size) / total_size used_percentage FROM oceanbase.__all_virtual_disk_stat; " + sql: "select b.zone, a.svr_ip,a.svr_port,b.with_rootserver, round(a.total_size/1024/1024/1024,0) as total_GB,round(a.free_size/1024/1024/1024,0) as free_GB, round((a.total_size-a.free_size)/1024/1024/1024,0) as used_GB,(a.total_size-a.free_size)/a.total_size as used_percent from oceanbase.__all_virtual_disk_stat a, oceanbase.DBA_OB_SERVERS b where a.svr_ip=b.svr_ip and a.svr_port=b.svr_port order by b.zone,used_percent desc;" + global: true + - type: sql # 查看每台机器上的tablet分布 + sql: "select svr_ip, svr_port, ls_id, count(*) from oceanbase.CDB_OB_TABLE_LOCATIONS group by svr_ip, svr_port, ls_id;" global: true - type: log grep: "" diff --git a/handler/gather/tasks/observer/unknown.yaml b/handler/gather/tasks/observer/unknown.yaml index 3d22f9af..ad2978b0 100644 --- a/handler/gather/tasks/observer/unknown.yaml +++ b/handler/gather/tasks/observer/unknown.yaml @@ -69,6 +69,15 @@ task: - type: sql sql: "SELECT a.TENANT_NAME,a.TENANT_ID,b.SVR_IP FROM oceanbase.DBA_OB_TENANTS a, oceanbase.GV$OB_UNITS b WHERE a.TENANT_ID=b.TENANT_ID;" global: true + - type: sql + sql: "select b.zone, a.svr_ip,a.svr_port,b.with_rootserver, round(a.total_size/1024/1024/1024,0) as total_GB,round(a.free_size/1024/1024/1024,0) as free_GB, round((a.total_size-a.free_size)/1024/1024/1024,0) as used_GB,(a.total_size-a.free_size)/a.total_size as used_percent from oceanbase.__all_virtual_disk_stat a, oceanbase.DBA_OB_SERVERS b where a.svr_ip=b.svr_ip and a.svr_port=b.svr_port order by b.zone,used_percent desc;" + global: true + - type: sql # 大于1g的内存模块 + sql: "SELECT CASE WHEN tenant_name IS NULL THEN TENANT_ID ELSE tenant_name END tenant_name, host,ctx_name, mod_name, hold, used, count FROM ( SELECT tenant_name,tenant_id,HOST,ctx_name,mod_name,hold,used,COUNT, ROW_NUMBER () OVER ( PARTITION BY tenant_name, HOST ORDER BY hold desc) rnum FROM (SELECT b.tenant_name, a.tenant_id, concat(a.svr_ip, ':', a.svr_port) HOST, a.ctx_name, a.mod_name, round(a.hold / 1024 / 1024 / 1024) hold, round(a.used / 1024 / 1024 / 1024) used, a.COUNT FROM oceanbase.__all_virtual_memory_info a LEFT JOIN oceanbase.__all_tenant b ON a.TENANT_ID = b.TENANT_ID WHERE a.hold > 1024 * 1024 * 1024 ));" + global: true + - type: sql # 查看每台机器上的tablet分布 + sql: "select svr_ip, svr_port, ls_id, count(*) from oceanbase.CDB_OB_TABLE_LOCATIONS group by svr_ip, svr_port, ls_id;" + global: true - type: sql sql: "show parameters like '%syslog_level%';" global: true From 8b1b275e0824a1a6e46763f836b3a2cfbdf16113 Mon Sep 17 00:00:00 2001 From: "jingshun.tq" <35712518+Teingi@users.noreply.github.com> Date: Wed, 17 Jul 2024 20:33:11 +0800 Subject: [PATCH 25/68] fix tabledump and analyze parameter (#340) * table dump print pretty result * Fix regression testing bugs * Fix regression testing bugs * Optimize logs * Optimize logs * Optimize logs * fix: gather tabledump * fix: gather tabledump * fix analyze flt_trace offline * remove duplicate code & optimize log * remove duplicate code & optimize log * fix: command auto-completion * fix: command auto-completion * fix: command auto-completion * fix analyze_parameter/analyze_variable log * add gathering scenario sql * fix tabledump and analyze parameter * fix tabledump and analyze parameter --- handler/analyzer/analyze_parameter.py | 2 +- handler/gather/gather_tabledump.py | 38 ++++++++++++++++----------- 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/handler/analyzer/analyze_parameter.py b/handler/analyzer/analyze_parameter.py index bb2d8117..98f094a8 100644 --- a/handler/analyzer/analyze_parameter.py +++ b/handler/analyzer/analyze_parameter.py @@ -162,7 +162,7 @@ def analyze_parameter_default(self): report_default_tb.add_row([row[1], row[2], row[3], row[4], tenant_id, row[6], row[11], row[7]]) fp.write(report_default_tb.get_string() + "\n") self.stdio.print(report_default_tb.get_string()) - self.stdio.print("Analyze parameter default finished. For more details, please run cmd '" + Fore.YELLOW + " cat {0}' ".format(file_name) + Style.RESET_ALL) + self.stdio.print("Analyze parameter default finished. For more details, please run cmd '" + Fore.YELLOW + " cat {0} ".format(file_name) + Style.RESET_ALL + "'") else: if self.parameter_file_name is None: self.stdio.error("the version of OceanBase is lower than 4.2.2, an initialization parameter file must be provided to find non-default values") diff --git a/handler/gather/gather_tabledump.py b/handler/gather/gather_tabledump.py index 3b4484f2..2a790e1f 100644 --- a/handler/gather/gather_tabledump.py +++ b/handler/gather/gather_tabledump.py @@ -62,8 +62,8 @@ def init(self): self.table = Util.get_option(options, 'table') user = Util.get_option(options, 'user') password = Util.get_option(options, 'password') - if not (self.database and self.database and user and password): - self.stdio.error("option --database/--table/--user/--password not found, please provide") + if not (self.database and self.table and user): + self.stdio.error("option --database/--table/--user not found, please provide") return False store_dir_option = Util.get_option(options, 'store_dir') if store_dir_option is not None and store_dir_option != './': @@ -100,27 +100,31 @@ def handle(self): if not self.init(): self.stdio.error('init failed') return False - self.execute() - if not self.is_innner: + excute_status = self.execute() + if not self.is_innner and excute_status: self.__print_result() def execute(self): try: self.version = get_observer_version(self.context) - self.__get_table_schema() - if self.version == "4.0.0.0" or StringUtils.compare_versions_greater(self.version, "4.0.0.0"): - self.__get_table_info() - else: - self.__get_table_info_v3() + if self.__get_table_schema(): + if self.version == "4.0.0.0" or StringUtils.compare_versions_greater(self.version, "4.0.0.0"): + return self.__get_table_info() + else: + return self.__get_table_info_v3() except Exception as e: - self.stdio.error("report sql result to file: {0} failed, error: {1}".format(self.file_name, e)) + self.stdio.error("report sql result failed, error: {0}".format(e)) def __get_table_schema(self): - sql = "show create table " + self.database + "." + self.table - columns, result = self.tenant_connector.execute_sql_return_columns_and_data(sql) - if result is None or len(result) == 0: - self.stdio.verbose("excute sql: {0}, result is None".format(sql)) - self.__report(sql, columns, result) + try: + sql = "show create table " + self.database + "." + self.table + columns, result = self.tenant_connector.execute_sql_return_columns_and_data(sql) + if result is None or len(result) == 0: + self.stdio.verbose("excute sql: {0}, result is None".format(sql)) + self.__report(sql, columns, result) + return True + except Exception as e: + self.stdio.error("show create table error {0}".format(e)) def __get_table_info(self): try: @@ -172,6 +176,7 @@ def __get_table_info(self): return self.stdio.print("data size {0}".format(result)) self.__report(query_data, columns, result) + return True except Exception as e: self.stdio.error("getTableInfo execute Exception: {0}".format(e).strip()) @@ -203,6 +208,7 @@ def __get_table_info_v3(self): return self.stdio.print("table count {0}".format(result)) self.__report(query_count, columns, result) + return True except Exception as e: self.stdio.error("getTableInfo execute Exception: {0}".format(e).strip()) @@ -215,7 +221,7 @@ def __report(self, sql, column_names, data): f.write('\n\n' + 'obclient > ' + sql + '\n') f.write(formatted_table) except Exception as e: - self.stdio.error("report sql result to file: {0} failed, error: ".format(self.file_name)) + self.stdio.error("report sql result to file: {0} failed, error:{1} ".format(self.file_name, e)) def __extract_string(self, s): if '@' in s: From 72dc2590c6cd370eaf6aca5a8d98687535f06432 Mon Sep 17 00:00:00 2001 From: "jingshun.tq" <35712518+Teingi@users.noreply.github.com> Date: Thu, 18 Jul 2024 21:00:48 +0800 Subject: [PATCH 26/68] fix: gather plan_monitor table info (#346) * fix * fix * fix --- .github/workflows/build_package.yml | 2 +- handler/gather/gather_plan_monitor.py | 4 ++-- handler/gather/gather_tabledump.py | 6 +++--- requirements3.txt | 1 + 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build_package.yml b/.github/workflows/build_package.yml index f2079d4e..6287fa6e 100644 --- a/.github/workflows/build_package.yml +++ b/.github/workflows/build_package.yml @@ -39,8 +39,8 @@ jobs: sed -i 's/pip install -r requirements3.txt/curl https:\/\/bootstrap.pypa.io\/get-pip.py -o get-pip.py\n\ python3 get-pip.py\n\ pip3 install -r requirements3.txt/' ./rpm/oceanbase-diagnostic-tool.spec + python3 -m pip install --upgrade pip wheel cat ./rpm/oceanbase-diagnostic-tool.spec - python3 -m pip install --upgrade pip setuptools wheel python3 --version rpmbuild -bb ./rpm/oceanbase-diagnostic-tool.spec diff --git a/handler/gather/gather_plan_monitor.py b/handler/gather/gather_plan_monitor.py index 38683d8e..8159c634 100644 --- a/handler/gather/gather_plan_monitor.py +++ b/handler/gather/gather_plan_monitor.py @@ -286,8 +286,8 @@ def report_schema(self, sql, tenant_name): handler.handle() except Exception as e: pass - table_info_file = os.path.join(self.local_stored_path, "obdiag_tabledump_result_{0}.txt".format(self.gather_timestamp)) - self.stdio.print("table info file path:{0}".format(table_info_file)) + table_info_file = os.path.join(self.local_stored_path, "obdiag_tabledump_result_{0}.txt".format(TimeUtils.timestamp_to_filename_time(self.gather_timestamp))) + self.stdio.verbose("table info file path:{0}".format(table_info_file)) table_info = self.get_table_info(table_info_file) if table_info: schemas = schemas + "
%s
" % table_info diff --git a/handler/gather/gather_tabledump.py b/handler/gather/gather_tabledump.py index 2a790e1f..9d73523d 100644 --- a/handler/gather/gather_tabledump.py +++ b/handler/gather/gather_tabledump.py @@ -62,9 +62,6 @@ def init(self): self.table = Util.get_option(options, 'table') user = Util.get_option(options, 'user') password = Util.get_option(options, 'password') - if not (self.database and self.table and user): - self.stdio.error("option --database/--table/--user not found, please provide") - return False store_dir_option = Util.get_option(options, 'store_dir') if store_dir_option is not None and store_dir_option != './': if not os.path.exists(os.path.abspath(store_dir_option)): @@ -81,6 +78,9 @@ def init(self): password = self.context.get_variable("gather_password") if self.context.get_variable("store_dir", None): self.store_dir = self.context.get_variable("store_dir") + if not (self.database and self.table and user): + self.stdio.error("option --database/--table/--user not found, please provide") + return False if self.context.get_variable("gather_tenant_name", None): self.tenant_name = self.context.get_variable("gather_tenant_name") else: diff --git a/requirements3.txt b/requirements3.txt index 2b4c46b6..96476675 100644 --- a/requirements3.txt +++ b/requirements3.txt @@ -38,3 +38,4 @@ sqlgpt-parser>=0.0.1a5 netifaces==0.11.0 netifaces==0.11.0 kubernetes==30.1.0 +setuptools==70.3.0 From 82f2dbcc96b1e932603a68a919cdd78514618880 Mon Sep 17 00:00:00 2001 From: "jingshun.tq" <35712518+Teingi@users.noreply.github.com> Date: Fri, 19 Jul 2024 15:44:04 +0800 Subject: [PATCH 27/68] fix sql_review log (#347) * fix * fix * fix * fix sql_review log --- handler/analyzer/analyze_sql_review.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/handler/analyzer/analyze_sql_review.py b/handler/analyzer/analyze_sql_review.py index c4253705..f2c8494d 100644 --- a/handler/analyzer/analyze_sql_review.py +++ b/handler/analyzer/analyze_sql_review.py @@ -80,6 +80,9 @@ def init_option(self): if files_option: self.directly_analyze_files = True self.analyze_files_list = files_option + else: + self.stdio.error("option --file not found, please provide") + return False db_user_option = Util.get_option(options, 'user') db_password_option = Util.get_option(options, 'password') tenant_name_option = Util.get_option(options, 'tenant_name') @@ -115,8 +118,10 @@ def handle(self): return False self.init_db_connector() self.local_store_path = os.path.join(self.local_stored_parrent_path, "obdiag_sql_review_result_{0}.html".format(TimeUtils.timestamp_to_filename_time(TimeUtils.get_current_us_timestamp()))) - self.stdio.print("use {0} as result store path.".format(self.local_store_path)) + self.stdio.verbose("use {0} as result store path.".format(self.local_store_path)) all_results = self.__directly_analyze_files() + if all_results is None: + return results = self.__parse_results(all_results) if self.output_type == "html": html_result = self.__generate_html_result(results) @@ -128,7 +133,7 @@ def handle(self): def __directly_analyze_files(self): sql_files = self.__get_sql_file_list() if len(sql_files) == 0: - self.stdio.warn("failed to find SQL files from the --files option provided") + self.stdio.error("failed to find SQL files from the --files option provided") return None file_results = {} sql_results = {} From 14adaf10a855c79eb2aa7e80d1ec853db1609d2a Mon Sep 17 00:00:00 2001 From: "jingshun.tq" <35712518+Teingi@users.noreply.github.com> Date: Fri, 19 Jul 2024 17:31:43 +0800 Subject: [PATCH 28/68] fix build package (#349) fix build package --- requirements3.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements3.txt b/requirements3.txt index 96476675..e80cc1dc 100644 --- a/requirements3.txt +++ b/requirements3.txt @@ -38,4 +38,4 @@ sqlgpt-parser>=0.0.1a5 netifaces==0.11.0 netifaces==0.11.0 kubernetes==30.1.0 -setuptools==70.3.0 +setuptools==65.6.3 From abe2afe6ad52a9b98c4f7dda8351867120c0bc5e Mon Sep 17 00:00:00 2001 From: "jingshun.tq" <35712518+Teingi@users.noreply.github.com> Date: Mon, 22 Jul 2024 15:22:38 +0800 Subject: [PATCH 29/68] fix gather table dump (#352) fix gather table dump --- handler/gather/gather_tabledump.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/handler/gather/gather_tabledump.py b/handler/gather/gather_tabledump.py index 9d73523d..afecfd24 100644 --- a/handler/gather/gather_tabledump.py +++ b/handler/gather/gather_tabledump.py @@ -117,6 +117,7 @@ def execute(self): def __get_table_schema(self): try: + self.table = self.__extract_table_name(self.table) sql = "show create table " + self.database + "." + self.table columns, result = self.tenant_connector.execute_sql_return_columns_and_data(sql) if result is None or len(result) == 0: @@ -237,6 +238,13 @@ def __extract_string(self, s): else: return s + def __extract_table_name(self, full_name): + parts = full_name.split('.') + if len(parts) > 1: + return parts[-1] + else: + return full_name + def __print_result(self): self.end_time = time.time() elapsed_time = self.end_time - self.start_time From 53eb8e1827b17234f1cc3b92f92f0b04a8275f79 Mon Sep 17 00:00:00 2001 From: "jingshun.tq" <35712518+Teingi@users.noreply.github.com> Date: Mon, 22 Jul 2024 16:44:36 +0800 Subject: [PATCH 30/68] fix init.sh: remove sh (#355) fix init.sh: remove sh --- rpm/oceanbase-diagnostic-tool.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpm/oceanbase-diagnostic-tool.spec b/rpm/oceanbase-diagnostic-tool.spec index bd08d8dc..0cb38620 100644 --- a/rpm/oceanbase-diagnostic-tool.spec +++ b/rpm/oceanbase-diagnostic-tool.spec @@ -80,6 +80,6 @@ find /usr/local/oceanbase-diagnostic-tool/obdiag -type f -exec chmod 644 {} \; ln -sf /usr/local/oceanbase-diagnostic-tool/obdiag /usr/bin/obdiag chmod +x /usr/local/oceanbase-diagnostic-tool/obdiag cp -rf /usr/local/oceanbase-diagnostic-tool/init_obdiag_cmd.sh /etc/profile.d/obdiag.sh -sh /usr/local/oceanbase-diagnostic-tool/init.sh +/usr/local/oceanbase-diagnostic-tool/init.sh echo -e 'Please execute the following command to init obdiag:\n' echo -e '\033[32m source /usr/local/oceanbase-diagnostic-tool/init.sh \n \033[0m' From de372c011e776f0550d3eb06564465c4203b7c20 Mon Sep 17 00:00:00 2001 From: wayyoungboy <35394786+wayyoungboy@users.noreply.github.com> Date: Mon, 22 Jul 2024 16:51:45 +0800 Subject: [PATCH 31/68] 2.3.0 update rca clog_disk_full_scene (#354) * clog update * build * update * update * update * update rca clog_disk_full_scene * del SsherClient SafeStdio super init func --- common/ssh_client/base.py | 1 - handler/rca/scene/clog_disk_full_scene.py | 38 +++++++++++++++++++++-- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/common/ssh_client/base.py b/common/ssh_client/base.py index 4ac6deed..870f73b5 100644 --- a/common/ssh_client/base.py +++ b/common/ssh_client/base.py @@ -22,7 +22,6 @@ class SsherClient(SafeStdio): def __init__(self, context, node): - super().__init__() self.context = context if context is not None: self.stdio = self.context.stdio diff --git a/handler/rca/scene/clog_disk_full_scene.py b/handler/rca/scene/clog_disk_full_scene.py index be45fa15..679e9f04 100644 --- a/handler/rca/scene/clog_disk_full_scene.py +++ b/handler/rca/scene/clog_disk_full_scene.py @@ -137,6 +137,7 @@ def __init__(self, context, tenant_id, ls_id, work_path, stdio, record=None): os.makedirs(work_path) self.stdio.verbose("work_path is {0}".format(self.work_path)) self.stdio = stdio + self.input_parameters = context.get_variable("input_parameters") or {} def execute(self): try: @@ -151,6 +152,9 @@ def execute(self): self.gather_log.grep("{0}".format(self.tenant_id)) self.gather_log.grep("{0}".format(self.ls_id)) self.gather_log.grep("clog checkpoint no change") + if self.input_parameters.get("since") is not None: + since = self.input_parameters.get("since") + self.gather_log.set_parameters("since", since) logs_name = self.gather_log.execute(save_path=work_path_checkpoint) if logs_name is None or len(logs_name) <= 0: self.record.add_record("no log_disk_full about checkpoint") @@ -192,6 +196,9 @@ def execute(self): self.gather_log.grep("{0}".format(self.tenant_id)) self.gather_log.grep("{0}".format(self.ls_id)) self.gather_log.grep("ObLSTxService::get_rec_scn") + if self.input_parameters.get("since") is not None: + since = self.input_parameters.get("since") + self.gather_log.set_parameters("since", since) logs_name = self.gather_log.execute(save_path=work_path_get_min_ckpt_type) check_min_ckpt_type = False for log_name in logs_name: @@ -222,6 +229,9 @@ def execute(self): self.gather_log.grep("{0}".format(self.tenant_id)) self.gather_log.grep("{0}".format(self.ls_id)) self.gather_log.grep("get_min_unreplayed_log_info") + if self.input_parameters.get("since") is not None: + since = self.input_parameters.get("since") + self.gather_log.set_parameters("since", since) logs_name = self.gather_log.execute(save_path=work_path_check_replay_stack) check_replay_stuck = False for log_name in logs_name: @@ -232,12 +242,18 @@ def execute(self): for line in lines: if check_replay_stuck: break - if "get_min_unreplayed_log_info" in line and self.get_stuck_mod(line).get('role_') is not None: - self.record.add_record("get min unreplayed log info is {0}".format(line)) + if "get_min_unreplayed_log_info" in line and self.get_stuck_modV2(line).get('role_') is not None: + replay_scn = self.parse_replay_scn(line) replay_scn_time = datetime.datetime.fromtimestamp(float(replay_scn) / 1000000000) log_time = self.parse_log_time(line) check_replay_stuck = log_time - replay_scn_time > datetime.timedelta(minutes=0.5) + if check_replay_stuck: + self.record.add_record("check_replay_stuck is True. the line: {0}".format(line)) + self.record.add_record("get min unreplayed log info is {0}".format(line)) + self.record.add_record("log_time - replay_scn_time : {0} - {1}".format(log_time, replay_scn_time)) + self.record.add_record("datetime.timedelta(minutes=0.5): {0}".format(datetime.timedelta(minutes=0.5))) + self.record.add_record("log_time - replay_scn_time > datetime.timedelta(minutes=0.5) is {0}".format(check_replay_stuck)) break self.record.add_record("check_replay_stuck is {0}".format(check_replay_stuck)) if check_replay_stuck: @@ -253,6 +269,9 @@ def execute(self): self.gather_log.grep("{0}".format(self.tenant_id)) self.gather_log.grep("log_frozen_memstore_info_if_need_") self.gather_log.grep("[TenantFreezer] oldest frozen memtable") + if self.input_parameters.get("since") is not None: + since = self.input_parameters.get("since") + self.gather_log.set_parameters("since", since) logs_name = self.gather_log.execute(save_path=work_path_check_dump_stuck) check_dump_stuck = False for log_name in logs_name: @@ -287,6 +306,9 @@ def execute(self): self.gather_log.set_parameters("scope", "observer") self.gather_log.grep("{0}".format(self.tenant_id)) self.gather_log.grep("Server out of disk space") + if self.input_parameters.get("since") is not None: + since = self.input_parameters.get("since") + self.gather_log.set_parameters("since", since) logs_name = self.gather_log.execute(save_path=work_path_check_data_disk_full) for log_name in logs_name: if check_data_disk_full: @@ -309,6 +331,9 @@ def execute(self): self.gather_log.set_parameters("scope", "observer") self.gather_log.grep("{0}".format(self.tenant_id)) self.gather_log.grep("Too many sstables in tablet, cannot schdule mini compaction, retry later") + if self.input_parameters.get("since") is not None: + since = self.input_parameters.get("since") + self.gather_log.set_parameters("since", since) logs_name = self.gather_log.execute(save_path=work_path_check_too_many_sstable) for log_name in logs_name: if check_too_many_sstable: @@ -339,6 +364,15 @@ def get_stuck_mod(self, line): d[i.group('key')] = i.group('value') return d + def get_stuck_modV2(self, line): + d = dict() + # service_type="TRANS_SERVICE" + p = '(?P[\w|_]+):(?P\w+)' + m = re.finditer(p, line) + for i in m: + d[i.group('key')] = i.group('value') + return d + def parse_checkpoint_scn(self, line): p = "checkpoint_scn=\{val:(?P\d+)\}," p1 = "checkpoint_scn=\{val:(?P\d+)," From 7014d7e851f4dcbe1e3ab6e5113685c5bce9cdcb Mon Sep 17 00:00:00 2001 From: wayyoungboy <35394786+wayyoungboy@users.noreply.github.com> Date: Mon, 22 Jul 2024 19:02:22 +0800 Subject: [PATCH 32/68] update lock_conflict_scene (#356) * clog update * build * update * update * update * update rca clog_disk_full_scene * del SsherClient SafeStdio super init func * update lock_conflict_scene --- handler/rca/scene/lock_conflict_scene.py | 1 + 1 file changed, 1 insertion(+) diff --git a/handler/rca/scene/lock_conflict_scene.py b/handler/rca/scene/lock_conflict_scene.py index c2de51be..b5a1b5d0 100644 --- a/handler/rca/scene/lock_conflict_scene.py +++ b/handler/rca/scene/lock_conflict_scene.py @@ -102,6 +102,7 @@ def __execute_4_2(self): audit_switch_value = cursor_check_switch.fetchone().get("value") if audit_switch_value.strip().upper() == "TRUE": holding_lock_sql_info_cursor = self.ob_connector.execute_sql_return_cursor_dictionary('SELECT * FROM oceanbase.gv$OB_SQL_AUDIT where SID="{0}";'.format(holding_lock_session_id)) + trans_record.add_record('exec sql: SELECT * FROM oceanbase.gv$OB_SQL_AUDIT where SID="{0}"; to get holding_lock_sql_info.'.format(holding_lock_session_id)) holding_lock_sql_info = holding_lock_sql_info_cursor.fetchall() if len(holding_lock_sql_info) == 0: trans_record.add_record("holding_lock_session_id: {0}; not find sql_info on gv$OB_SQL_AUDIT".format(holding_lock_session_id)) From 16c2d52d6f03f909dad9b5e596b9e93593dd3563 Mon Sep 17 00:00:00 2001 From: "jingshun.tq" <35712518+Teingi@users.noreply.github.com> Date: Mon, 22 Jul 2024 20:53:49 +0800 Subject: [PATCH 33/68] Temporarily hide the analyze SQL, to be reopened after resolving the slow ply (#357) Temporarily hide the analyze SQL, to be reopened after resolving the slow ply (#357) --- diag_cmd.py | 4 ++-- init_obdiag_cmd.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/diag_cmd.py b/diag_cmd.py index e6f18c84..394e931d 100644 --- a/diag_cmd.py +++ b/diag_cmd.py @@ -922,8 +922,8 @@ def __init__(self): self.register_command(ObdiagAnalyzeFltTraceCommand()) self.register_command(ObdiagAnalyzeParameterCommand()) self.register_command(ObdiagAnalyzeVariableCommand()) - self.register_command(ObdiagAnalyzeSQLCommand()) - self.register_command(ObdiagAnalyzeSQLReviewCommand()) + # self.register_command(ObdiagAnalyzeSQLCommand()) + # self.register_command(ObdiagAnalyzeSQLReviewCommand()) class ObdiagRCACommand(MajorCommand): diff --git a/init_obdiag_cmd.sh b/init_obdiag_cmd.sh index dc292fcf..dac049c9 100644 --- a/init_obdiag_cmd.sh +++ b/init_obdiag_cmd.sh @@ -19,7 +19,7 @@ _obdiag_completion() { ;; analyze) if [ "$COMP_CWORD" -eq 2 ]; then - type_list="log flt_trace sql sql_review parameter variable" + type_list="log flt_trace parameter variable" elif [ "${COMP_WORDS[2]}" = "parameter" ] && [ "$COMP_CWORD" -eq 3 ]; then type_list="diff default" elif [ "${COMP_WORDS[2]}" = "variable" ] && [ "$COMP_CWORD" -eq 3 ]; then From d0605815edd7b3e223990eac8de86df7733f1423 Mon Sep 17 00:00:00 2001 From: "jingshun.tq" <35712518+Teingi@users.noreply.github.com> Date: Tue, 23 Jul 2024 10:19:43 +0800 Subject: [PATCH 34/68] fix sql plan monitor rows null (#359) fix sql plan monitor rows null --- handler/gather/gather_plan_monitor.py | 18 ++++++++++++------ handler/meta/sql_meta.py | 8 ++++---- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/handler/gather/gather_plan_monitor.py b/handler/gather/gather_plan_monitor.py index 8159c634..6d248bcf 100644 --- a/handler/gather/gather_plan_monitor.py +++ b/handler/gather/gather_plan_monitor.py @@ -145,7 +145,7 @@ def handle_plan_monitor_from_ob(cluster_name): sql_plan_monitor_detail_v1 = str(sql_plan_monitor_detail_template).replace("##REPLACE_TRACE_ID##", trace_id).replace("##REPLACE_ORDER_BY##", "PLAN_LINE_ID ASC, SVR_IP, SVR_PORT, CHANGE_TS, PROCESS_NAME ASC") sql_plan_monitor_detail_v2 = str(sql_plan_monitor_detail_template).replace("##REPLACE_TRACE_ID##", trace_id).replace("##REPLACE_ORDER_BY##", "PROCESS_NAME ASC, PLAN_LINE_ID ASC, FIRST_REFRESH_TIME ASC") - sql_plan_monitor_dfo_op = self.sql_plan_monitor_dfo_op_sql(tenant_id, plan_id, trace_id) + sql_plan_monitor_dfo_op = self.sql_plan_monitor_dfo_op_sql(tenant_id, plan_id, trace_id, svr_ip, svr_port) full_audit_sql_by_trace_id_sql = self.full_audit_sql_by_trace_id_sql(trace_id) plan_explain_sql = self.plan_explain_sql(tenant_id, plan_id, svr_ip, svr_port) @@ -740,7 +740,7 @@ def full_audit_sql_by_trace_id_sql(self, trace_id): sql = "select /*+ sql_audit */ %s from sys.%s where trace_id = '%s' AND " "length(client_ip) > 4 ORDER BY REQUEST_ID" % (GlobalSqlMeta().get_value(key="sql_audit_item_oracle"), self.sql_audit_name, trace_id) return sql - def sql_plan_monitor_dfo_op_sql(self, tenant_id, plan_id, trace_id): + def sql_plan_monitor_dfo_op_sql(self, tenant_id, plan_id, trace_id, svr_ip, svr_port): if self.tenant_mode == 'mysql': if self.ob_major_version >= 4: sql = ( @@ -749,6 +749,8 @@ def sql_plan_monitor_dfo_op_sql(self, tenant_id, plan_id, trace_id): .replace("##REPLACE_PLAN_ID##", str(plan_id)) .replace("##REPLACE_TENANT_ID##", str(tenant_id)) .replace("##REPLACE_PLAN_EXPLAIN_TABLE_NAME##", self.plan_explain_name) + .replace("##REPLACE_SVR_IP##", svr_ip) + .replace("##REPLACE_SVR_PORT##", str(svr_port)) ) else: sql = ( @@ -757,25 +759,29 @@ def sql_plan_monitor_dfo_op_sql(self, tenant_id, plan_id, trace_id): .replace("##REPLACE_PLAN_ID##", str(plan_id)) .replace("##REPLACE_TENANT_ID##", str(tenant_id)) .replace("##REPLACE_PLAN_EXPLAIN_TABLE_NAME##", self.plan_explain_name) + .replace("##REPLACE_SVR_IP##", svr_ip) + .replace("##REPLACE_SVR_PORT##", str(svr_port)) ) else: if self.ob_major_version >= 4: sql = ( - GlobalSqlMeta() - .get_value(key="sql_plan_monitor_dfo_op_oracle_obversion4") + str(GlobalSqlMeta().get_value(key="sql_plan_monitor_dfo_op_oracle_obversion4")) .replace("##REPLACE_TRACE_ID##", trace_id) .replace("##REPLACE_PLAN_ID##", str(plan_id)) .replace("##REPLACE_TENANT_ID##", str(tenant_id)) .replace("##REPLACE_PLAN_EXPLAIN_TABLE_NAME##", self.plan_explain_name) + .replace("##REPLACE_SVR_IP##", svr_ip) + .replace("##REPLACE_SVR_PORT##", str(svr_port)) ) else: sql = ( - GlobalSqlMeta() - .get_value(key="sql_plan_monitor_dfo_op_oracle") + str(GlobalSqlMeta().get_value(key="sql_plan_monitor_dfo_op_oracle")) .replace("##REPLACE_TRACE_ID##", trace_id) .replace("##REPLACE_PLAN_ID##", str(plan_id)) .replace("##REPLACE_TENANT_ID##", str(tenant_id)) .replace("##REPLACE_PLAN_EXPLAIN_TABLE_NAME##", self.plan_explain_name) + .replace("##REPLACE_SVR_IP##", svr_ip) + .replace("##REPLACE_SVR_PORT##", str(svr_port)) ) return sql diff --git a/handler/meta/sql_meta.py b/handler/meta/sql_meta.py index 452c6de7..1168a2b3 100644 --- a/handler/meta/sql_meta.py +++ b/handler/meta/sql_meta.py @@ -242,7 +242,7 @@ def rm_value(self, key): ) plan_monitor LEFT JOIN ( - SELECT "ROWS", PLAN_LINE_ID FROM sys.##REPLACE_PLAN_EXPLAIN_TABLE_NAME## WHERE plan_id = ##REPLACE_PLAN_ID## AND tenant_id = ##REPLACE_TENANT_ID## + SELECT "ROWS", PLAN_LINE_ID FROM sys.##REPLACE_PLAN_EXPLAIN_TABLE_NAME## WHERE plan_id = ##REPLACE_PLAN_ID## AND tenant_id = ##REPLACE_TENANT_ID## and ip = '##REPLACE_SVR_IP##' and port = ##REPLACE_SVR_PORT## ) plan_explain ON plan_monitor.PLAN_LINE_ID = plan_explain.PLAN_LINE_ID @@ -316,7 +316,7 @@ def rm_value(self, key): ) plan_monitor LEFT JOIN ( - SELECT ROWS, PLAN_LINE_ID FROM oceanbase.##REPLACE_PLAN_EXPLAIN_TABLE_NAME## WHERE plan_id = ##REPLACE_PLAN_ID## AND tenant_id = ##REPLACE_TENANT_ID## + SELECT ROWS, PLAN_LINE_ID FROM oceanbase.##REPLACE_PLAN_EXPLAIN_TABLE_NAME## WHERE plan_id = ##REPLACE_PLAN_ID## AND tenant_id = ##REPLACE_TENANT_ID## and ip = '##REPLACE_SVR_IP##' and port = ##REPLACE_SVR_PORT## ) plan_explain ON plan_monitor.PLAN_LINE_ID = plan_explain.PLAN_LINE_ID @@ -648,7 +648,7 @@ def rm_value(self, key): ) plan_monitor LEFT JOIN ( - SELECT "ROWS", PLAN_LINE_ID FROM sys.##REPLACE_PLAN_EXPLAIN_TABLE_NAME## WHERE plan_id = ##REPLACE_PLAN_ID## AND tenant_id = ##REPLACE_TENANT_ID## + SELECT "ROWS", PLAN_LINE_ID FROM sys.##REPLACE_PLAN_EXPLAIN_TABLE_NAME## WHERE plan_id = ##REPLACE_PLAN_ID## AND tenant_id = ##REPLACE_TENANT_ID## and svr_ip = '##REPLACE_SVR_IP##' and svr_port = ##REPLACE_SVR_PORT## ) plan_explain ON plan_monitor.PLAN_LINE_ID = plan_explain.PLAN_LINE_ID @@ -726,7 +726,7 @@ def rm_value(self, key): ) plan_monitor LEFT JOIN ( - SELECT ROWS, PLAN_LINE_ID FROM oceanbase.##REPLACE_PLAN_EXPLAIN_TABLE_NAME## WHERE plan_id = ##REPLACE_PLAN_ID## AND tenant_id = ##REPLACE_TENANT_ID## + SELECT ROWS, PLAN_LINE_ID FROM oceanbase.##REPLACE_PLAN_EXPLAIN_TABLE_NAME## WHERE plan_id = ##REPLACE_PLAN_ID## AND tenant_id = ##REPLACE_TENANT_ID## and svr_ip = '##REPLACE_SVR_IP##' and svr_port = ##REPLACE_SVR_PORT## ) plan_explain ON plan_monitor.PLAN_LINE_ID = plan_explain.PLAN_LINE_ID From c38f36baefccacf50c64a87822375ae8dfc6b800 Mon Sep 17 00:00:00 2001 From: wayyoungboy <35394786+wayyoungboy@users.noreply.github.com> Date: Tue, 23 Jul 2024 11:48:19 +0800 Subject: [PATCH 35/68] 2.3.0 clog update (#360) * clog update * build * update * update * update * update rca clog_disk_full_scene * del SsherClient SafeStdio super init func * update lock_conflict_scene * update clog_disk_full_scene --- handler/rca/scene/clog_disk_full_scene.py | 26 ++++++++++++----------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/handler/rca/scene/clog_disk_full_scene.py b/handler/rca/scene/clog_disk_full_scene.py index 679e9f04..65bb50aa 100644 --- a/handler/rca/scene/clog_disk_full_scene.py +++ b/handler/rca/scene/clog_disk_full_scene.py @@ -94,6 +94,7 @@ def execute(self): self.verbose("tenant_ls_datas is {0}".format(tenant_ls_datas)) self.record.add_record("tenant_ls_datas is {0}".format(tenant_ls_datas)) self.record.add_suggest("init data end. Please check the other record.") + self.record.add_suggest("If you want to learn more or get help, you can package the folder '{0}' and upload it to the OceanBase community forum.".format(self.work_path)) for tenant_ls_data in tenant_ls_datas: record = RCA_ResultRecord(self.stdio) record.add_record("check error tenant_ls_data is {0}".format(tenant_ls_data)) @@ -145,7 +146,7 @@ def execute(self): if not os.path.exists(work_path): os.makedirs(work_path) # __check_checkpoint - self.record.add_record("__check_checkpoint") + self.record.add_record("check_checkpoint") work_path_checkpoint = work_path + "/checkpoint/" # gather log about tenant_id, ls, "clog checkpoint no change". self.gather_log.set_parameters("scope", "observer") @@ -184,8 +185,8 @@ def execute(self): if is_clog_checkpoint_stuck is False: self.record.add_record("is_clog_checkpoint_stuck is {0}".format(is_clog_checkpoint_stuck)) return False - self.record.add_record("__check_checkpoint end") - self.record.add_record("__get_min_ckpt_type start") + self.record.add_record("check_checkpoint end") + self.record.add_record("get_min_ckpt_type start") if stuck_service_type != "" and stuck_service_type != 'TRANS_SERVICE': self.record.add_record("stuck_service_type is {0}, not 'TRANS_SERVICE'. pass __get_min_ckpt_type".format(stuck_service_type)) pass @@ -217,8 +218,8 @@ def execute(self): self.record.add_suggest("min_checkpoint_tx_log_type is {0}. please check it.".format(min_checkpoint_tx_log_type)) break self.record.add_record("check_min_ckpt_type is {0}".format(check_min_ckpt_type)) - self.record.add_record("__get_min_ckpt_type end") - self.record.add_record("__check_replay_stuck start") + self.record.add_record("get_min_ckpt_type end") + self.record.add_record("check_replay_stuck start") if stuck_service_type != 'TRANS_SERVICE' and stuck_service_type != 'MAX_DECIDED_SCN': self.record.add_record("stuck_service_type is {0} (not TRANS_SERVICE or MAX_DECIDED_SCN). pass __check_replay_stuck. ".format(stuck_service_type)) pass @@ -258,8 +259,9 @@ def execute(self): self.record.add_record("check_replay_stuck is {0}".format(check_replay_stuck)) if check_replay_stuck: self.record.add_record("check_replay_stuck is True. Please check replay status") - self.record.add_record("__check_replay_stuck end") - self.record.add_record("__check_dump_stuck start") + self.record.add_suggest("check_replay_stuck is True. Please check replay status") + self.record.add_record("check_replay_stuck end") + self.record.add_record("check_dump_stuck start") if stuck_service_type != 'TRANS_SERVICE': self.record.add_record("stuck_service_type is {0} (not TRANS_SERVICE ). pass __check_dump_stuck.".format(stuck_service_type)) else: @@ -298,8 +300,8 @@ def execute(self): self.record.add_record("check_dump_stuck is {0}".format(check_dump_stuck)) if check_dump_stuck: self.record.add_suggest("Dump stuck, please check dump status.") - self.record.add_record("__check_dump_stuck end") - self.record.add_record("__check_data_disk_full start") + self.record.add_record("check_dump_stuck end") + self.record.add_record("check_data_disk_full start") check_data_disk_full = False work_path_check_data_disk_full = work_path + "/check_data_disk_full/" # gather log about tenant_id, "Server out of disk space" @@ -323,8 +325,8 @@ def execute(self): self.record.add_record("check_data_disk_full is {0}".format(check_data_disk_full)) if check_data_disk_full: self.record.add_suggest("Data disk full, please check data disk usage.") - self.record.add_record("__check_data_disk_full end") - self.record.add_record("__check_too_many_sstable start") + self.record.add_record("check_data_disk_full end") + self.record.add_record("check_too_many_sstable start") check_too_many_sstable = False work_path_check_too_many_sstable = work_path + "/check_too_many_sstable/" # gather log about tenant_id, "Too many sstables in tablet, cannot schdule mini compaction, retry later" @@ -348,7 +350,7 @@ def execute(self): self.record.add_record("check_too_many_sstable is {0}".format(check_too_many_sstable)) if check_too_many_sstable: self.record.add_suggest("Too many sstables in tablet, please check the number of sstables in the tablet.") - self.record.add_record("__check_too_many_sstable end") + self.record.add_record("check_too_many_sstable end") self.record.add_record("check end") return True except Exception as e: From 3d596213bcc7c66a02a71f7c10cabfe14e158233 Mon Sep 17 00:00:00 2001 From: "jingshun.tq" <35712518+Teingi@users.noreply.github.com> Date: Tue, 23 Jul 2024 17:50:14 +0800 Subject: [PATCH 36/68] fix analyze parameter/variable (#361) * fix * fix * fix * fix sql_review log * fix build package * fix gather table dump * fix * Temporarily hide the analyze SQL, to be reopened after resolving the slow ply * fix sql plan monitor rows null * fix analyze parameter/variable --- handler/analyzer/analyze_parameter.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/handler/analyzer/analyze_parameter.py b/handler/analyzer/analyze_parameter.py index 98f094a8..26c6c31a 100644 --- a/handler/analyzer/analyze_parameter.py +++ b/handler/analyzer/analyze_parameter.py @@ -90,7 +90,9 @@ def check_file_valid(self): flag = 0 if flag == 0: self.stdio.error('args --file [{0}] is not a valid parameter file, Please specify it again'.format(os.path.abspath(self.parameter_file_name))) - exit(-1) + return False + else: + return True def init_option_default(self): options = self.context.options @@ -110,10 +112,11 @@ def init_option_default(self): if offline_file_option: if not os.path.exists(os.path.abspath(offline_file_option)): self.stdio.error('args --file [{0}] not exist: No such file, Please specify it again'.format(os.path.abspath(offline_file_option))) - exit(-1) + return False else: self.parameter_file_name = os.path.abspath(offline_file_option) - self.check_file_valid() + if not self.check_file_valid(): + return False return True def init_option_diff(self): @@ -138,7 +141,8 @@ def init_option_diff(self): return False else: self.parameter_file_name = os.path.abspath(offline_file_option) - self.check_file_valid() + if not self.check_file_valid(): + return False return True def analyze_parameter_default(self): @@ -166,7 +170,7 @@ def analyze_parameter_default(self): else: if self.parameter_file_name is None: self.stdio.error("the version of OceanBase is lower than 4.2.2, an initialization parameter file must be provided to find non-default values") - exit(-1) + return else: sql = '''select substr(version(),8), svr_ip,svr_port,zone,scope,TENANT_ID,name,value,section, EDIT_LEVEL, now(),'','' from GV$OB_PARAMETERS order by 5,2,3,4,7''' From 387fa977f3905a81d84aac550d8dca1e05224984 Mon Sep 17 00:00:00 2001 From: "jingshun.tq" <35712518+Teingi@users.noreply.github.com> Date: Wed, 24 Jul 2024 10:21:51 +0800 Subject: [PATCH 37/68] fix analyze variable (#362) * fix * fix * fix * fix sql_review log * fix build package * fix gather table dump * fix * Temporarily hide the analyze SQL, to be reopened after resolving the slow ply * fix sql plan monitor rows null * fix analyze parameter/variable * fix --- handler/analyzer/analyze_variable.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/handler/analyzer/analyze_variable.py b/handler/analyzer/analyze_variable.py index fd2255dc..e3bbc5d9 100644 --- a/handler/analyzer/analyze_variable.py +++ b/handler/analyzer/analyze_variable.py @@ -74,7 +74,9 @@ def check_file_valid(self): flag = 0 if flag == 0: self.stdio.error('args --file [{0}] is not a valid variable file, Please specify it again'.format(os.path.abspath(self.variable_file_name))) - exit(-1) + return False + else: + return True def init_option(self): options = self.context.options @@ -86,7 +88,8 @@ def init_option(self): return False else: self.variable_file_name = os.path.abspath(offline_file_option) - self.check_file_valid() + if not self.check_file_valid(): + return False else: self.stdio.error("args --file need provided to find the parts where variables have changed.") return False From 06a57435fe2527dc1d0332dc892efcca9441d0bf Mon Sep 17 00:00:00 2001 From: "jingshun.tq" <35712518+Teingi@users.noreply.github.com> Date: Wed, 24 Jul 2024 11:11:38 +0800 Subject: [PATCH 38/68] release 2.3.0 & update README (#363) * fix * fix * fix * fix sql_review log * fix build package * fix gather table dump * fix * Temporarily hide the analyze SQL, to be reopened after resolving the slow ply * fix sql plan monitor rows null * fix analyze parameter/variable * fix * release 2.3.0 --- README-CN.md | 4 ++-- README.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README-CN.md b/README-CN.md index 30cf2144..e9c8c9c3 100644 --- a/README-CN.md +++ b/README-CN.md @@ -77,8 +77,8 @@ obdiag 期望构建一个开放的社区,我们欢迎任何形式的贡献, |2.0.0|2024.03| 2024.04.11|
  • context改造,场景化扩展能力增强
  • 支持在线更新巡检、采集的task
  • 根因分析二期
| |2.1.0|2024.04| 2024.05.13|
  • 根因分析场景扩展
  • 新增 ash 报告 采集
| |2.2.0|2024.05| 2024.06.14 |
  • 根因分析场景扩展
  • 巡检场景扩展
| -|2.3.0|2024.06| - |
  • 根因分析场景扩展
  • 支持 SQL 诊断
| -|2.4.0|2024.07| - |
  • 根因分析场景扩展
  • 适配两款内核的诊断工具
| +|2.3.0|2024.06| 2024.07.24 |
  • 根因分析场景扩展
  • 新增基础采集功能: tabledump
  • 新增参数/变量比对分析功能
  • 执行底座改造,支持 k8s 部署的 OceanBase 集群诊断
| +|2.4.0|2024.07| - |
  • 易用性改造
  • 支持 SQL 诊断
| |2.5.0|2024.08| - |
  • 根因分析场景扩展
  • 支持 OMS 诊断
| |3.0.0|2024.09| - |
  • 根因分析场景扩展
  • 服务化改造
| |3.1.0|2024.10| - |
  • 根因分析场景扩展
  • 支持巡检报告比对
| diff --git a/README.md b/README.md index 4a079434..8dd16418 100644 --- a/README.md +++ b/README.md @@ -81,8 +81,8 @@ obdiag envisions an open community. We welcome your contributions in any form: |2.0.0|2024.03| 2024.04.11|
  • Context Transformation, Enhanced Scene Expansion Capabilities
  • Support online updating of inspection and gather tasks
  • Root Cause Analysis Phase II Transformation
| |2.1.0|2024.04| 2024.05.13|
  • Root Cause Analysis Scenario Expansion
  • Gather ash report
| |2.2.0|2024.05| 2024.06.14 |
  • Root Cause Analysis Scenario Expansion
  • Check Scenario Expansion
| -|2.3.0|2024.06| - |
  • Root Cause Analysis Scenario Expansion
  • Support SQL Diagnosis
| -|2.4.0|2024.07| - |
  • Root Cause Analysis Scenario Expansion
  • Adapting Two Additional Kernel Diagnostic Tools
| +|2.3.0|2024.06| 2024.07.24 |
  • Root Cause Analysis Scenario Expansion
  • Added basic gather feature: tabledump
  • Added parameter/variable gather and analyze feature
  • Execute infrastructure modifications to support diagnostics for OceanBase clusters deployed on Kubernetes (k8s)
| +|2.4.0|2024.07| - |
  • usability improvement
  • Support SQL Diagnosis
| |2.5.0|2024.08| - |
  • Root Cause Analysis Scenario Expansion
  • Support OMS Diagnosis
| |3.0.0|2024.09| - |
  • Root Cause Analysis Scenario Expansion
  • Service-ification Transformation
| |3.1.0|2024.10| - |
  • Root Cause Analysis Scenario Expansion
  • Supporting Comparative Functionality for Patrol Inspection Reports
| From 695d1bda33c89f3f228ff44cc225acb3e5322ec3 Mon Sep 17 00:00:00 2001 From: wayyoungboy <35394786+wayyoungboy@users.noreply.github.com> Date: Wed, 24 Jul 2024 15:38:22 +0800 Subject: [PATCH 39/68] 2.3.0 fix analyze_log offline (#365) * clog update * build * update * update * update * update rca clog_disk_full_scene * del SsherClient SafeStdio super init func * update lock_conflict_scene * update clog_disk_full_scene * fix analyze_log offline * fix analyze_log offline --- handler/analyzer/analyze_log.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/handler/analyzer/analyze_log.py b/handler/analyzer/analyze_log.py index 434211e6..d018f36f 100644 --- a/handler/analyzer/analyze_log.py +++ b/handler/analyzer/analyze_log.py @@ -31,6 +31,7 @@ from common.tool import DirectoryUtil from common.tool import FileUtil from common.tool import TimeUtils +import common.ssh_client.local_client as ssh_client_local_client class AnalyzeLogHandler(BaseShellHandler): @@ -279,11 +280,13 @@ def __pharse_offline_log_file(self, ssh_client, log_name, local_store_dir): :param ssh_helper, log_name :return: """ + + ssh_client = ssh_client_local_client.LocalClient(context=self.context, node={"ssh_type": "local"}) local_store_path = "{0}/{1}".format(local_store_dir, str(log_name).strip(".").replace("/", "_")) if self.grep_args is not None: grep_cmd = "grep -e '{grep_args}' {log_name} >> {local_store_path} ".format(grep_args=self.grep_args, log_name=log_name, local_store_path=local_store_path) self.stdio.verbose("grep files, run cmd = [{0}]".format(grep_cmd)) - ssh_client.exec_cmd(ssh_client, grep_cmd) + ssh_client.exec_cmd(grep_cmd) else: download_file(ssh_client, log_name, local_store_path, self.stdio) From 0d7c4e9ea77e402f8ae075b2989ba373a41dd8a8 Mon Sep 17 00:00:00 2001 From: wayyoungboy <35394786+wayyoungboy@users.noreply.github.com> Date: Wed, 24 Jul 2024 17:16:11 +0800 Subject: [PATCH 40/68] fix analyze_log offline (#367) * clog update * build * update * update * update * update rca clog_disk_full_scene * del SsherClient SafeStdio super init func * update lock_conflict_scene * update clog_disk_full_scene * fix analyze_log offline * fix analyze_log offline * fix analyze_log offline * fix analyze_log offline --- handler/analyzer/analyze_log.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/handler/analyzer/analyze_log.py b/handler/analyzer/analyze_log.py index d018f36f..439f7928 100644 --- a/handler/analyzer/analyze_log.py +++ b/handler/analyzer/analyze_log.py @@ -132,9 +132,20 @@ def handle(self): local_store_parent_dir = os.path.join(self.gather_pack_dir, "obdiag_analyze_pack_{0}".format(TimeUtils.timestamp_to_filename_time(TimeUtils.get_current_us_timestamp()))) self.stdio.verbose("Use {0} as pack dir.".format(local_store_parent_dir)) analyze_tuples = [] - for node in self.nodes: + + def handle_from_node(node): resp, node_results = self.__handle_from_node(node, local_store_parent_dir) analyze_tuples.append((node.get("ip"), False, resp["error"], node_results)) + + if self.is_ssh: + for node in self.nodes: + handle_from_node(node) + else: + local_ip = '127.0.0.1' + node = self.nodes[0] + node["ip"] = local_ip + handle_from_node(node) + self.stdio.start_loading('analyze result start') title, field_names, summary_list, summary_details_list = self.__get_overall_summary(analyze_tuples, self.directly_analyze_files) table = tabulate.tabulate(summary_list, headers=field_names, tablefmt="grid", showindex=False) From f39a27d8af5ff016263efac1438961d61f1dbe83 Mon Sep 17 00:00:00 2001 From: cui xiaofei <1184810369@qq.com> Date: Fri, 2 Aug 2024 15:06:36 +0800 Subject: [PATCH 41/68] introduce ob_log_parser (#376) --- common/ob_log_parser.py | 591 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 591 insertions(+) create mode 100644 common/ob_log_parser.py diff --git a/common/ob_log_parser.py b/common/ob_log_parser.py new file mode 100644 index 00000000..54ebef96 --- /dev/null +++ b/common/ob_log_parser.py @@ -0,0 +1,591 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/8/1 +@file: ob_log_parser.py +@desc: +""" + +import re + +OceanbaseObjDict = { + 'ObDMLBaseParam': [ + 'timeout', + 'schema_version', + 'sql_mode', + 'is_total_quantity_log', + 'table_param', + 'tenant_schema_version', + 'is_ignore', + 'prelock', + 'encrypt_meta', + 'is_batch_stmt', + 'write_flag', + 'spec_seq_no', + 'snapshot', + 'branch_id', + 'check_schema_version', + ], + 'ObStoreCtx': ['this', 'ls_id', 'ls', 'branch', 'timeout', 'tablet_id', 'table_iter', 'table_version', 'mvcc_acc_ctx', 'tablet_stat', 'is_read_store_ctx'], + 'ObTableDMLParam': ['tenant_schema_version', 'data_table', 'col_descs', 'col_map'], + 'ObTableSchemaParam': ['table_id', 'schema_version', 'table_type', 'index_type', 'index_status', 'shadow_rowkey_column_num', 'fulltext_col_id', 'index_name', 'pk_name', 'columns', 'read_info', 'lob_inrow_threshold'], + 'ObMemtable': [ + 'ObITable', + 'this', + 'timestamp', + 'state', + 'freeze_clock', + 'max_schema_version', + 'max_data_schema_version', + 'max_column_cnt', + 'write_ref_cnt', + 'local_allocator', + 'unsubmitted_cnt', + 'unsynced_cnt', + 'logging_blocked', + 'unset_active_memtable_logging_blocked', + 'resolve_active_memtable_left_boundary', + 'contain_hotspot_row', + 'max_end_scn', + 'rec_scn', + 'snapshot_version', + 'migration_clog_checkpoint_scn', + 'is_tablet_freeze', + 'is_force_freeze', + ['contain_hotspot_row', 'contain_hotspot_row2'], + 'read_barrier', + 'is_flushed', + 'freeze_state', + 'allow_freeze', + ['mt_stat_.frozen_time', 'frozen_time'], + ['mt_stat_.ready_for_flush_time', 'ready_for_flush_time'], + ['mt_stat_.create_flush_dag_time', 'create_flush_dag_time'], + ['mt_stat_.release_time', 'release_time'], + ['mt_stat_.push_table_into_gc_queue_time', 'push_table_into_gc_queue_time'], + ['mt_stat_.last_print_time', 'last_print_time'], + 'ls_id', + 'transfer_freeze_flag', + 'recommend_snapshot_version', + ], + 'ObMemtable2': [ + 'ObITabletMemtable', + 'this', + 'state', + 'max_data_schema_version', + 'max_column_cnt', + 'local_allocator', + 'contain_hotspot_row', + 'snapshot_version', + ['contain_hotspot_row', 'contain_hotspot_row2'], + 'ls_id', + 'transfer_freeze_flag', + 'recommend_snapshot_version', + ], + 'ObITabletMemtable': [ + 'ObITable', + 'ls_id_', + 'allow_freeze_', + 'is_flushed_', + 'is_tablet_freeze_', + 'logging_blocked_', + 'resolved_active_memtable_left_boundary_', + 'unset_active_memtable_logging_blocked_', + 'has_backoffed_', + 'read_barrier_', + 'freeze_clock_', + 'freeze_state_', + 'unsubmitted_cnt_', + 'init_timestamp_', + 'max_schema_version_', + 'write_ref_cnt_', + 'max_end_scn_', + 'rec_scn_', + 'freeze_scn_', + 'migration_clog_checkpoint_scn_', + 'freezer_', + 'memtable_mgr_handle_', + ['mt_stat_.frozen_time_', 'frozen_time'], + ['mt_stat_.ready_for_flush_time_', 'ready_for_flush_time'], + ['mt_stat_.create_flush_dag_time_', 'create_flush_dag_time'], + ['mt_stat_.release_time_', 'release_time'], + ['mt_stat_.push_table_into_gc_queue_time_', 'push_table_into_gc_queue_time'], + ['mt_stat_.last_print_time_', 'last_print_time'], + ], + 'ObDagTypeStruct': ['init_dag_prio', 'sys_task_type', 'dag_type_str', 'dag_module_str'], + 'ObTabletMergeInfo': ['is_inited', 'sstable_merge_info', 'sstable_builder'], + 'ObSSTableMergeInfo': [ + 'tenant_id', + 'ls_id', + 'tablet_id', + 'compaction_scn', + 'merge_type', + 'merge_cost_time', + 'merge_start_time', + 'merge_finish_time', + 'dag_id', + 'occupy_size', + 'new_flush_occupy_size', + 'original_size', + 'compressed_size', + 'macro_block_count', + 'multiplexed_macro_block_count', + 'new_micro_count_in_new_macro', + 'multiplexed_micro_count_in_new_macro', + 'total_row_count', + 'incremental_row_count', + 'new_flush_data_rate', + 'is_full_merge', + 'progressive_merge_round', + 'progressive_merge_num', + 'concurrent_cnt', + 'start_cg_idx', + 'end_cg_idx', + 'suspect_add_time', + 'early_create_time', + 'dag_ret', + 'retry_cnt', + 'task_id', + 'error_location', + 'kept_snapshot_info', + 'merge_level', + 'parallel_merge_info', + 'filter_statistics', + 'participant_table_info', + 'macro_id_list', + 'comment', + ], + 'SCN1': ['val'], + 'SCN': ['val', 'v'], + 'ObLSID': ['id'], +} + +OceanbaseObjDetailDict = { + 'ObDMLBaseParam': { + 'table_param': 'ObTableDMLParam', + }, + 'ObTableDMLParam': { + 'data_table': 'ObTableSchemaParam', + }, + 'ObMemtable2': { + 'ObITabletMemtable': 'ObITabletMemtable', + }, + 'ObTabletMergeInfo': { + 'sstable_merge_info': 'ObSSTableMergeInfo', + }, +} + +OceanbaseObjCompilePattern = {} + +OceanbaseLogVarDict = { + 'Main4377Log': ['column_id', 'storage_old_row', 'sql_old_row', 'dml_param', 'dml_flag', 'store_ctx', 'relative_table'], + 'OldestFrozenLog': ['list'], + 'DumpDagStatusLog': ['type', 'dag_count', 'running_dag_count', 'added_dag_count', 'scheduled_dag_count', 'scheduled_task_count', 'scheduled_data_size'], + 'TenantMemoryLog': [ + 'tenant_id', + 'now', + 'active_memstore_used', + 'total_memstore_used', + 'total_memstore_hold', + 'memstore_freeze_trigger_limit', + 'memstore_limit', + 'mem_tenant_limit', + 'mem_tenant_hold', + 'max_mem_memstore_can_get_now', + 'memstore_alloc_pos', + 'memstore_frozen_pos', + 'memstore_reclaimed_pos', + ], + 'MergeFinishLog': ['ret', 'merge_info', 'sstable', 'mem_peak', 'compat_mode', 'time_guard'], + 'ClogDiskFullLog': [ + 'msg', + 'ret', + ['total_size\(MB\)', 'total_size'], + ['used_size\(MB\)', 'used_size'], + ['used_percent\(%\)', 'used_percent'], + ['warn_size\(MB\)', 'warn_size'], + ['warn_percent\(%\)', 'warn_percent'], + ['limit_size\(MB\)', 'limit_size'], + ['limit_percent\(%\)', 'limit_percent'], + ['maximum_used_size\(MB\)', 'maximum_used_size'], + 'maximum_log_stream', + 'oldest_log_stream', + 'oldest_scn', + ], + 'ClogDiskFullLog2': [ + 'msg', + 'ret', + ['total_size\(MB\)', 'total_size'], + ['used_size\(MB\)', 'used_size'], + ['used_percent\(%\)', 'used_percent'], + ['warn_size\(MB\)', 'warn_size'], + ['warn_percent\(%\)', 'warn_percent'], + ['limit_size\(MB\)', 'limit_size'], + ['limit_percent\(%\)', 'limit_percent'], + ['total_unrecyclable_size_byte\(MB\)', 'total_unrecyclable_size_byte'], + ['maximum_used_size\(MB\)', 'maximum_used_size'], + 'maximum_log_stream', + 'oldest_log_stream', + 'oldest_scn', + 'in_shrinking', + ], + 'ClogCPTNoChangeLog': ['checkpoint_scn', 'checkpoint_scn_in_ls_meta', 'ls_id', 'service_type'], + 'LSReplayStatLog': ['id', 'replayed_log_size', 'unreplayed_log_size'], +} + +OceanbaseLogVarObjDict = { + 'Main4377Log': { + 'dml_param': 'ObDMLBaseParam', + 'store_ctx': 'ObStoreCtx', + }, + 'OldestFrozenLog': { + 'list': 'not_standard_obj_list', + }, + 'DumpDagStatusLog': { + 'type': 'ObDagTypeStruct', + }, + 'MergeFinishLog': { + 'merge_info': 'ObTabletMergeInfo', + }, + 'ClogDiskFullLog': { + 'oldest_scn': 'SCN1', + }, + 'ClogDiskFullLog2': {'oldest_scn': 'SCN'}, + 'ClogCPTNoChangeLog': { + 'checkpoint_scn': 'SCN', + 'checkpoint_scn_in_ls_meta': 'SCN', + 'ls_id': 'ObLSID', + }, + 'LSReplayStatLog': { + 'id': 'ObLSID', + }, +} + +OceanbaseLogVarCompilePattern = {} + + +class ObLogParser: + compiled_log_pattern = None + compiled_raw_log_pattern = None + + @staticmethod + def get_obj_list(list_str): + # will split with the {} + res = [] + depth = 0 + obj_start = None + for i, char in enumerate(list_str): + if char == '{': + if depth == 0: + # find a Object start position + obj_start = i + depth += 1 + elif char == '}': + depth -= 1 + if depth == 0 and obj_start is not None: + res.append(list_str[obj_start : i + 1]) + obj_start = None + return res + + @staticmethod + def get_obj_key_list(obj_str): + # will split with the {} + key_list = [] + depth = 0 + key_start = None + for i, char in enumerate(obj_str): + if char == '{': + if depth == 0 and key_start is None: + key_start = i + 1 + depth += 1 + elif char == '}': + depth -= 1 + elif char == ',': + if depth == 1: + # 1 for , 1 for ' ' + key_start = i + 2 + elif char == ':': + if depth == 1 and key_start is not None: + key_list.append(obj_str[key_start:i]) + key_start = None + return key_list + + @staticmethod + def get_obj_parser_pattern(key_list): + parray = [] + for k in key_list: + if isinstance(k, list): + tp = '({0}:(?P<{1}>.*))'.format(k[0], k[1]) + else: + replace_list = ['.', '(', ')'] + r_k = k + for ri in replace_list: + r_k = r_k.replace(ri, '_') + s_k = k + s_k = s_k.replace('(', '\(') + s_k = s_k.replace(')', '\)') + tp = '({0}:(?P<{1}>.*))'.format(s_k, r_k) + parray.append(tp) + p = '\s*\,\s*'.join(parray) + '\}' + return p + + @staticmethod + def get_log_var_parser_pattern(key_list): + parray = [] + for k in key_list: + if isinstance(k, list): + tp = '({0}=(?P<{1}>.*))'.format(k[0], k[1]) + else: + tp = '({0}=(?P<{0}>.*))'.format(k) + parray.append(tp) + p = '\s*\,\s*'.join(parray) + '\)' + return p + + @staticmethod + def get_raw_log_var_parser_pattern(key_list): + parray = [] + for k in key_list: + if isinstance(k, list): + tp = '({0}=(?P<{1}>.*))'.format(k[0], k[1]) + else: + tp = '({0}=(?P<{0}>.*))'.format(k) + parray.append(tp) + p = '\s*\ \s*'.join(parray) + return p + + @staticmethod + def parse_obj_detail(obj_name, obj_dict): + # parse all the child str to child obj + obj_detail_dict = OceanbaseObjDetailDict.get(obj_name, None) + if not obj_detail_dict: + print('{} obj detail cannot be parsed'.format(obj_name)) + else: + for k in obj_dict.keys(): + child_obj_name = obj_detail_dict.get(k, None) + if child_obj_name: + td = ObLogParser.parse_obj(child_obj_name, obj_dict[k]) + obj_dict[k] = td + ObLogParser.parse_obj_detail(child_obj_name, obj_dict[k]) + + @staticmethod + def parse_obj_detail_v2(obj_name, obj_dict): + # parse all the child str to child obj + obj_detail_dict = OceanbaseObjDetailDict.get(obj_name, None) + if not obj_detail_dict: + # parse all the detail if it start with { + for k in obj_dict.keys(): + if obj_dict[k].startswith('{'): + td = ObLogParser.parse_obj_v2(k, obj_dict[k]) + obj_dict[k] = td + ObLogParser.parse_obj_detail_v2(k, obj_dict[k]) + else: + for k in obj_dict.keys(): + child_obj_name = obj_detail_dict.get(k, None) + if child_obj_name: + td = ObLogParser.parse_obj(child_obj_name, obj_dict[k]) + obj_dict[k] = td + ObLogParser.parse_obj_detail(child_obj_name, obj_dict[k]) + + @staticmethod + def parse_obj(obj_name, obj_str): + d = dict() + key_list = OceanbaseObjDict.get(obj_name, []) + if len(key_list) == 0: + print('{} obj cannot be parsed'.format(obj_name)) + else: + p = OceanbaseObjCompilePattern.get(obj_name, None) + if p is None: + tp = ObLogParser.get_obj_parser_pattern(key_list) + OceanbaseObjCompilePattern[obj_name] = re.compile(tp) + p = OceanbaseObjCompilePattern[obj_name] + m = p.finditer(obj_str) + for i in m: + d.update(i.groupdict()) + return d + + @staticmethod + def parse_obj_v2(obj_name, obj_str): + is_tmp_pattern = False + d = dict() + key_list = OceanbaseObjDict.get(obj_name, []) + if len(key_list) == 0: + is_tmp_pattern = True + key_list = ObLogParser.get_obj_key_list(obj_str) + if len(key_list) != 0: + p = OceanbaseObjCompilePattern.get(obj_name, None) + if p is None: + tp = ObLogParser.get_obj_parser_pattern(key_list) + OceanbaseObjCompilePattern[obj_name] = re.compile(tp) + p = OceanbaseObjCompilePattern[obj_name] + m = p.finditer(obj_str) + for i in m: + d.update(i.groupdict()) + if is_tmp_pattern: + OceanbaseObjCompilePattern[obj_name] = None + return d + + @staticmethod + def parse_log_vars_detail(log_name, var_dict): + var_obj_dict = OceanbaseLogVarObjDict.get(log_name, None) + if not var_obj_dict: + print('{} vars detail cannot be parsed'.format(log_name)) + else: + for k in var_dict.keys(): + var_obj_name = var_obj_dict.get(k, None) + if var_obj_name == "not_standard_obj_list": + tp_obj_list = ObLogParser.get_obj_list(var_dict[k]) + var_dict[k] = tp_obj_list + elif var_obj_name: + td = ObLogParser.parse_obj(var_obj_name, var_dict[k]) + var_dict[k] = td + ObLogParser.parse_obj_detail(var_obj_name, var_dict[k]) + + @staticmethod + def parse_log_vars_detail_v2(log_name, var_dict): + var_obj_dict = OceanbaseLogVarObjDict.get(log_name, None) + if not var_obj_dict: + # get obj list + for k in var_dict.keys(): + if var_dict[k].startswith('{'): + td = ObLogParser.parse_obj_v2(k, var_dict[k]) + var_dict[k] = td + ObLogParser.parse_obj_detail_v2(k, var_dict[k]) + else: + for k in var_dict.keys(): + var_obj_name = var_obj_dict.get(k, None) + if var_obj_name == "not_standard_obj_list": + tp_obj_list = ObLogParser.get_obj_list(var_dict[k]) + var_dict[k] = tp_obj_list + elif var_obj_name: + td = ObLogParser.parse_obj(var_obj_name, var_dict[k]) + var_dict[k] = td + ObLogParser.parse_obj_detail(var_obj_name, var_dict[k]) + + @staticmethod + def parse_raw_log_vars(log_name, var_str): + d = dict() + key_list = OceanbaseLogVarDict.get(log_name, []) + if len(key_list) == 0: + print('{} lob vars cannot be parsed'.format(log_name)) + else: + p = OceanbaseLogVarCompilePattern.get(log_name, None) + if p is None: + tp = ObLogParser.get_raw_log_var_parser_pattern(key_list) + OceanbaseLogVarCompilePattern[log_name] = re.compile(tp) + p = OceanbaseLogVarCompilePattern[log_name] + m = p.finditer(var_str) + for i in m: + d.update(i.groupdict()) + return d + + @staticmethod + def parse_normal_log_vars(log_name, var_str): + d = dict() + key_list = OceanbaseLogVarDict.get(log_name, []) + if len(key_list) == 0: + print('{} lob vars cannot be parsed'.format(log_name)) + else: + p = OceanbaseLogVarCompilePattern.get(log_name, None) + if p is None: + tp = ObLogParser.get_log_var_parser_pattern(key_list) + OceanbaseLogVarCompilePattern[log_name] = re.compile(tp) + p = OceanbaseLogVarCompilePattern[log_name] + m = p.finditer(var_str) + for i in m: + d.update(i.groupdict()) + return d + + @staticmethod + def parse_normal_log_vars_v2(var_str): + d = dict() + log_name = 'log_vars_v2' + p = OceanbaseLogVarCompilePattern.get(log_name, None) + if p is None: + tp = r'(\w+)=(.*?)(?=\s\w+=|$)' + OceanbaseLogVarCompilePattern[log_name] = re.compile(tp) + p = OceanbaseLogVarCompilePattern[log_name] + m = p.findall(var_str) + for i in m: + key = i[0] + val = i[1].strip(',') + d[key] = val + return d + + @staticmethod + def parse_log_vars(log_name, var_str, log_type=1): + d = dict() + if log_type == 1: + d = ObLogParser.parse_normal_log_vars(log_name, var_str) + if log_type == 2: + # raw log + d = ObLogParser.parse_raw_log_vars(log_name, var_str) + return d + + @staticmethod + def parse_raw_print_log(line): + # parse a log that produced by raw print + d = dict() + if ObLogParser.compiled_raw_log_pattern is None: + msg = "(?P\[.*\])" + vars = "(?P.*)" + parray = [msg, vars] + p = '\s*'.join(parray) + ObLogParser.compiled_raw_log_pattern = re.compile(p) + m = ObLogParser.compiled_raw_log_pattern.finditer(line) + for i in m: + d.update(i.groupdict()) + return d + + @staticmethod + def parse_log_vars_v2(log_name, var_str, log_type=1): + d = dict() + if log_type == 1: + d = ObLogParser.parse_normal_log_vars_v2(var_str) + if log_type == 2: + # raw log + d = ObLogParser.parse_raw_log_vars(log_name, var_str) + return d + + @staticmethod + def parse_log(line): + # parse a normal log, get all the element + # get raw print log if it is not a normal log. + d = dict() + # 1, means normal log + # 2, means raw print log + log_type = 1 + if ObLogParser.compiled_log_pattern is None: + date_time = "\[(?P\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+)\]" + log_level = "(?P[A-Z]+)" + module = "\[?(?P[A-Z]+\.*[A-Z]*)?\]?" + func = "(?P[a-zA-Z_0-9]+\(?\)?)" + file_no = "\((?P[a-zA-Z0-9_\.\-]+):(?P[0-9]+)\)" + thread_no = "\[(?P[0-9]+)\]" + thread_name = "\[(?P[A-Za-z]+[0-9_A-Za-z]*)?\]" + tenant_id = "\[T(?P[0-9]+)\]" + trace_id = "\[(?P[A-Za-z\-0-9]+)\]" + lt = "\[lt=(?P[0-9]+)\]" + errcode = "(\[errcode=\-?)?(?P[0-9]+)?(\])?" + msg = "(?P[A-Za-z\s\,\.\[\]\!\_]+)?" + variables = "\(?(?P.*)?\)?$" + parray = [date_time, log_level, module, func, file_no, thread_no, thread_name, tenant_id, trace_id, lt, errcode, msg, variables] + p = '\s*'.join(parray) + ObLogParser.compiled_log_pattern = re.compile(p) + m = ObLogParser.compiled_log_pattern.finditer(line) + for i in m: + d.update(i.groupdict()) + if not d: + log_type = 2 + d = ObLogParser.parse_raw_print_log(line) + if d: + d['log_type'] = log_type + return d From 0d86334231a90532823eb6c067d44380767e3701 Mon Sep 17 00:00:00 2001 From: "jingshun.tq" <35712518+Teingi@users.noreply.github.com> Date: Mon, 5 Aug 2024 10:46:57 +0800 Subject: [PATCH 42/68] update version to 2.4.0 (#380) --- rpm/build.sh | 2 +- rpm/oceanbase-diagnostic-tool.spec | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rpm/build.sh b/rpm/build.sh index 5ea86c23..d56d3c4b 100755 --- a/rpm/build.sh +++ b/rpm/build.sh @@ -2,7 +2,7 @@ python_bin='python' W_DIR=`pwd` -VERSION=${VERSION:-'2.3.0'} +VERSION=${VERSION:-'2.4.0'} function python_version() diff --git a/rpm/oceanbase-diagnostic-tool.spec b/rpm/oceanbase-diagnostic-tool.spec index 0cb38620..c3621909 100644 --- a/rpm/oceanbase-diagnostic-tool.spec +++ b/rpm/oceanbase-diagnostic-tool.spec @@ -1,5 +1,5 @@ Name: oceanbase-diagnostic-tool -Version:2.3.0 +Version:2.4.0 Release: %(echo $RELEASE)%{?dist} Summary: oceanbase diagnostic tool program Group: Development/Tools From 58c37e7e497b06dc92a2d3458a94fe292659e3d4 Mon Sep 17 00:00:00 2001 From: xuyan wang <35394786+wayyoungboy@users.noreply.github.com> Date: Mon, 5 Aug 2024 11:03:37 +0800 Subject: [PATCH 43/68] 2.4.0 io update and support remote_client_sudo (#372) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * update io: add stderr * update io: add stderr * add --inner_config add print_type * support remote_client_sudo * build test package * fix --inner_config * fix io stream * test * test * test * test * test * 取消测试分支打包 --- common/command.py | 35 ++------------------ common/ssh_client/remote_client.py | 15 ++++++++- conf/inner_config.yml | 3 ++ config.py | 21 +++++++++++- core.py | 8 +++-- diag_cmd.py | 50 ++++++++++++++++++++++++++-- handler/gather/gather_log.py | 4 +-- stdio.py | 53 ++++++++++++++++++++++++++++-- 8 files changed, 145 insertions(+), 44 deletions(-) diff --git a/common/command.py b/common/command.py index 2af96419..915d0f60 100644 --- a/common/command.py +++ b/common/command.py @@ -48,37 +48,6 @@ def run_get_stderr(self, cmd): self.stdio.error("run cmd = [{0}] on localhost".format(cmd)) -# -# class SshClient(object): -# def __init__(self, stdio=None): -# self.stdio = stdio -# -# def run(self, ssh_helper, cmd): -# try: -# self.stdio.verbose("[remote host {0}] excute cmd = [{1}]".format(ssh_helper.get_name(), cmd)) -# stdout = ssh_helper.ssh_exec_cmd(cmd) -# self.stdio.verbose("[remote host {0}] excute cmd = [{1}] complete, stdout=[{2}]".format(ssh_helper.get_name(), cmd, stdout)) -# return stdout -# except Exception as e: -# self.stdio.error("[remote host {0}] excute cmd = [{1}] except: [{2}]".format(ssh_helper.get_name(), cmd, e)) -# -# def run_get_stderr(self, ssh_helper, cmd): -# try: -# self.stdio.verbose("[remote host {0}] run cmd = [{1}] start ...".format(ssh_helper.get_name(), cmd)) -# std = ssh_helper.ssh_exec_cmd_get_stderr(cmd) -# return std -# except Exception as e: -# self.stdio.error("[remote host {0}] run ssh cmd = [{1}] except: {2}".format(ssh_helper.get_name(), cmd, e)) -# -# def run_ignore_err(self, ssh_helper, cmd): -# try: -# self.stdio.verbose("[remote host {0}] run cmd = [{1}] start ...".format(ssh_helper.get_name(), cmd)) -# std = ssh_helper.ssh_exec_cmd_ignore_err(cmd) -# return std -# except SSHException as e: -# self.stdio.error("[remote host {0}] run ssh cmd = [{1}] except: {2}".format(ssh_helper.get_name(), cmd, e)) - - def download_file(ssh_client, remote_path, local_path, stdio=None): """ download file @@ -220,7 +189,7 @@ def zip_dir(ssh_client, father_dir, zip_dir, stdio=None): Compress files through zip :return: """ - cmd = "cd {father_dir} && zip {zip_dir}.zip -rm {zip_dir}".format(father_dir=father_dir, zip_dir=zip_dir) + cmd = "zip {father_dir}/{zip_dir}.zip -rm {father_dir}/{zip_dir}".format(father_dir=father_dir, zip_dir=zip_dir) ssh_client.exec_cmd(cmd) @@ -229,7 +198,7 @@ def zip_encrypt_dir(ssh_client, zip_password, father_dir, zip_dir, stdio=None): Compress files by encryption :return: """ - cmd = "cd {father_dir} && zip --password {zip_password} {zip_dir}.zip -rm {zip_dir}".format(zip_password=zip_password, father_dir=father_dir, zip_dir=zip_dir) + cmd = "zip --password {zip_password} {father_dir}/{zip_dir}.zip -rm {father_dir}/{zip_dir}".format(zip_password=zip_password, father_dir=father_dir, zip_dir=zip_dir) ssh_client.exec_cmd(cmd) diff --git a/common/ssh_client/remote_client.py b/common/ssh_client/remote_client.py index b128be0e..c17c874b 100644 --- a/common/ssh_client/remote_client.py +++ b/common/ssh_client/remote_client.py @@ -50,8 +50,12 @@ def __init__(self, context, node): self.key_file = os.path.expanduser(self.key_file) self._ssh_fd = None self._sftp_client = None + # remote_client_sudo + self.remote_client_sudo = bool(self.context.inner_config.get("obdiag").get("ssh_client").get("remote_client_sudo")) + # remote_client_disable_rsa_algorithms DISABLED_ALGORITHMS = dict(pubkeys=["rsa-sha2-512", "rsa-sha2-256"]) - if ENV_DISABLE_RSA_ALGORITHMS == 1: + remote_client_disable_rsa_algorithms = bool(self.context.inner_config.get("obdiag").get("basic").get("dis_rsa_algorithms")) + if remote_client_disable_rsa_algorithms: self._disabled_rsa_algorithms = DISABLED_ALGORITHMS self.ssh_type = "remote" if len(self.key_file) > 0: @@ -75,6 +79,15 @@ def __init__(self, context, node): def exec_cmd(self, cmd): try: + if self.remote_client_sudo: + # check sudo without password + self.stdio.verbose("use remote_client_sudo") + stdin, stdout, stderr = self._ssh_fd.exec_command("sudo -n true") + if stderr: + if len(stderr.read().decode('utf-8').strip()) > 0: + raise Exception(stderr.read().decode('utf-8')) + cmd = "sudo {0}".format(cmd) + self.stdio.verbose('Execute Shell command on server {0}:{1}'.format(self.host_ip, cmd)) stdin, stdout, stderr = self._ssh_fd.exec_command(cmd) err_text = stderr.read() if len(err_text): diff --git a/conf/inner_config.yml b/conf/inner_config.yml index c0480eda..db4aa329 100644 --- a/conf/inner_config.yml +++ b/conf/inner_config.yml @@ -12,6 +12,9 @@ obdiag: log_level: INFO mode: obdiag stdout_handler_log_level: INFO + error_stream: sys.stdout + ssh_client: + remote_client_sudo: 0 check: ignore_version: false work_path: "~/.obdiag/check" diff --git a/config.py b/config.py index 9deb6f3b..cc2fc19f 100644 --- a/config.py +++ b/config.py @@ -73,6 +73,10 @@ 'log_level': 'INFO', 'mode': 'obdiag', 'stdout_handler_log_level': 'INFO', + 'error_stream': 'sys.stdout', + }, + 'ssh_client': { + 'remote_client_sudo': False, }, }, 'check': { @@ -257,7 +261,22 @@ def get_node_config(self, type, node_ip, config_item): class InnerConfigManager(Manager): - def __init__(self, stdio=None): + def __init__(self, stdio=None, inner_config_change_map=None): + if inner_config_change_map is None: + inner_config_change_map = {} inner_config_abs_path = os.path.abspath(INNER_CONFIG_FILE) super().__init__(inner_config_abs_path, stdio=stdio) self.config = self.load_config_with_defaults(DEFAULT_INNER_CONFIG) + if inner_config_change_map != {}: + self.config = self._change_inner_config(self.config, inner_config_change_map) + + def _change_inner_config(self, conf_map, change_conf_map): + for key, value in change_conf_map.items(): + if key in conf_map: + if isinstance(value, dict): + self._change_inner_config(conf_map[key], value) + else: + conf_map[key] = value + else: + conf_map[key] = value + return conf_map diff --git a/core.py b/core.py index 73f8a956..3a7d996c 100644 --- a/core.py +++ b/core.py @@ -62,7 +62,7 @@ class ObdiagHome(object): - def __init__(self, stdio=None, config_path=os.path.expanduser('~/.obdiag/config.yml')): + def __init__(self, stdio=None, config_path=os.path.expanduser('~/.obdiag/config.yml'), inner_config_change_map=None): self._optimize_manager = None self.stdio = None self._stdio_func = None @@ -71,7 +71,11 @@ def __init__(self, stdio=None, config_path=os.path.expanduser('~/.obdiag/config. self.namespaces = {} self.set_stdio(stdio) self.context = None - self.inner_config_manager = InnerConfigManager(stdio) + self.inner_config_manager = InnerConfigManager(stdio=stdio, inner_config_change_map=inner_config_change_map) + if self.inner_config_manager.config.get("obdiag") is not None and self.inner_config_manager.config.get("obdiag").get("basic") is not None and self.inner_config_manager.config.get("obdiag").get("basic").get("print_type") is not None: + stdio.set_err_stream(self.inner_config_manager.config.get("obdiag").get("logger").get("error_stream")) + + self.set_stdio(stdio) self.config_manager = ConfigManager(config_path, stdio) if ( self.inner_config_manager.config.get("obdiag") is not None diff --git a/diag_cmd.py b/diag_cmd.py index 394e931d..ccca161d 100644 --- a/diag_cmd.py +++ b/diag_cmd.py @@ -29,7 +29,8 @@ from common.version import get_obdiag_version from telemetry.telemetry import telemetry -ROOT_IO = IO(1) +# TODO when obdiag_version ≥ 3.0, the default value of err_stream will be changed to sys.stderr +ROOT_IO = IO(1, error_stream=sys.stdout) OBDIAG_HOME_PATH = os.path.join(os.getenv('HOME'), 'oceanbase-diagnostic-tool') @@ -111,13 +112,58 @@ def __init__(self, name, summary): self.is_init = False self.hidden = False self.has_trace = True + self.inner_config_change_map = {} self.parser = AllowUndefinedOptionParser(add_help_option=True) self.parser.add_option('-h', '--help', action='callback', callback=self._show_help, help='Show help and exit.') self.parser.add_option('-v', '--verbose', action='callback', callback=self._set_verbose, help='Activate verbose output.') + self.parser.add_option('--inner_config', action='callback', type="str", callback=self._inner_config_change, help='change inner config. ') def _set_verbose(self, *args, **kwargs): ROOT_IO.set_verbose_level(0xFFFFFFF) + def _inner_config_change(self, option, opt_str, value, parser): + """ + Inner config change + """ + try: + key, val = value.split('=') + if key is None or key == "": + return + m = self._inner_config_change_set(key, val) + + def _change_inner_config(conf_map, change_conf_map): + for change_conf_map_key, change_conf_map_value in change_conf_map.items(): + if change_conf_map_key in conf_map: + if isinstance(change_conf_map_value, dict): + _change_inner_config(conf_map[change_conf_map_key], change_conf_map_value) + else: + conf_map[change_conf_map_key] = change_conf_map_value + else: + conf_map[change_conf_map_key] = change_conf_map_value + return conf_map + + self.inner_config_change_map = _change_inner_config(self.inner_config_change_map, m) + except Exception as e: + raise Exception("Key or val ({1}) is illegal: {0}".format(e, value)) + + def _inner_config_change_set(self, key, val): + def recursion(change_map, key, val): + if key is None or key == "": + raise Exception("key is None") + if val is None or val == "": + raise Exception("val is None") + if key.startswith(".") or key.endswith("."): + raise Exception("Key starts or ends '.'") + if "." in key: + map_key = key.split(".")[0] + change_map[map_key] = recursion({}, key[len(map_key) + 1 :], val) + return change_map + else: + change_map[key] = val + return change_map + + return recursion({}, key, val) + def init(self, cmd, args): if self.is_init is False: self.prev_cmd = cmd @@ -216,7 +262,7 @@ def do_command(self): else: ROOT_IO.error('The option you provided with -c: {0} is a non-existent configuration file path.'.format(custom_config)) return - obdiag = ObdiagHome(stdio=ROOT_IO, config_path=config_path) + obdiag = ObdiagHome(stdio=ROOT_IO, config_path=config_path, inner_config_change_map=self.inner_config_change_map) obdiag.set_options(self.opts) obdiag.set_cmds(self.cmds) ret = self._do_command(obdiag) diff --git a/handler/gather/gather_log.py b/handler/gather/gather_log.py index 6001296a..80f6cb81 100644 --- a/handler/gather/gather_log.py +++ b/handler/gather/gather_log.py @@ -281,9 +281,9 @@ def __get_log_name(self, ssh_client, node): home_path = node.get("home_path") log_path = os.path.join(home_path, "log") if self.scope == "observer" or self.scope == "rootservice" or self.scope == "election": - get_oblog = "ls -1 -F %s/*%s.log* | awk -F '/' '{print $NF}'" % (log_path, self.scope) + get_oblog = "ls -1 -F %s |grep %s | awk -F '/' '{print $NF}'" % (log_path, self.scope) else: - get_oblog = "ls -1 -F %s/observer.log* %s/rootservice.log* %s/election.log* | awk -F '/' '{print $NF}'" % (log_path, log_path, log_path) + get_oblog = "ls -1 -F %s |grep -E 'observer|rootservice|election'| awk -F '/' '{print $NF}'" % log_path log_name_list = [] log_files = ssh_client.exec_cmd(get_oblog) if log_files: diff --git a/stdio.py b/stdio.py index 2a0f86cf..bdb10105 100644 --- a/stdio.py +++ b/stdio.py @@ -13,6 +13,7 @@ from __future__ import absolute_import, division, print_function +import json import os import signal import sys @@ -358,7 +359,7 @@ class IO(object): WARNING_PREV = FormtatText.warning('[WARN]') ERROR_PREV = FormtatText.error('[ERROR]') - def __init__(self, level, msg_lv=MsgLevel.DEBUG, use_cache=False, track_limit=0, root_io=None, input_stream=SysStdin, output_stream=sys.stdout): + def __init__(self, level, msg_lv=MsgLevel.DEBUG, use_cache=False, track_limit=0, root_io=None, input_stream=SysStdin, output_stream=sys.stdout, error_stream=sys.stdout): self.level = level self.msg_lv = msg_lv self.default_confirm = False @@ -373,12 +374,15 @@ def __init__(self, level, msg_lv=MsgLevel.DEBUG, use_cache=False, track_limit=0, self.sync_obj = None self.input_stream = None self._out_obj = None + self._err_obj = None self._cur_out_obj = None + self._cur_err_obj = None self._before_critical = None self._output_is_tty = False self._input_is_tty = False self.set_input_stream(input_stream) self.set_output_stream(output_stream) + self.set_err_stream(error_stream) def isatty(self): if self._root_io: @@ -400,6 +404,24 @@ def set_output_stream(self, output_stream): self._output_is_tty = output_stream.isatty() return True + def set_err_stream(self, error_stream): + if isinstance(error_stream, str): + error_stream = error_stream.strip().lower() + if error_stream == "sys.stderr": + error_stream = sys.stderr + elif error_stream == "sys.stdout": + error_stream = sys.stdout + else: + # TODO 3.X NEED CHANGE TO sys.stderr + error_stream = sys.stdout + if self._root_io: + return False + if self._cur_err_obj == self._err_obj: + self._cur_err_obj = error_stream + self._err_obj = error_stream + self._output_is_tty = error_stream.isatty() + return True + def init_trace_logger(self, log_path, log_name=None, trace_id=None, recreate=False): if self._root_io: return False @@ -417,7 +439,7 @@ def __getstate__(self): state = {} for key in self.__dict__: state[key] = self.__dict__[key] - for key in ['_trace_logger', 'input_stream', 'sync_obj', '_out_obj', '_cur_out_obj', '_before_critical']: + for key in ['_trace_logger', 'input_stream', 'sync_obj', '_out_obj', '_err_obj', '_cur_out_obj', '_cur_err_obj', '_before_critical']: state[key] = None return state @@ -501,6 +523,11 @@ def get_input_stream(self): return self._root_io.get_input_stream() return self.input_stream + def get_cur_err_obj(self): + if self._root_io: + return self._root_io.get_cur_err_obj() + return self._cur_err_obj + def get_cur_out_obj(self): if self._root_io: return self._root_io.get_cur_out_obj() @@ -512,6 +539,7 @@ def _start_buffer_io(self): if self._cur_out_obj != self._out_obj: return False self._cur_out_obj = BufferIO() + self._cur_err_obj = BufferIO() return True def _stop_buffer_io(self): @@ -519,10 +547,16 @@ def _stop_buffer_io(self): return False if self._cur_out_obj == self._out_obj: return False + if self._cur_err_obj == self._err_obj: + return False text = self._cur_out_obj.read() + text_err = self._cur_err_obj.read() self._cur_out_obj = self._out_obj + self._cur_err_obj = self._err_obj if text: self.print(text) + if text_err: + self.error(text_err) return True @staticmethod @@ -680,7 +714,10 @@ def _print(self, msg_lv, msg, *args, **kwargs): del kwargs['prev_msg'] else: print_msg = msg - kwargs['file'] = self.get_cur_out_obj() + if msg_lv == MsgLevel.ERROR: + kwargs['file'] = self.get_cur_err_obj() + else: + kwargs['file'] = self.get_cur_out_obj() kwargs['file'] and print(self._format(print_msg, *args), **kwargs) del kwargs['file'] self.log(msg_lv, msg, *args, **kwargs) @@ -733,6 +770,16 @@ def verbose(self, msg, *args, **kwargs): return self._print(MsgLevel.VERBOSE, '%s %s' % (self._verbose_prefix, msg), *args, **kwargs) + def print_result_json(self, result): + + if not result: + return + if isinstance(result, dict): + result = json.dumps(result, indent=4) + self.print(result) + + pass + if sys.version_info.major == 2: def exception(self, msg='', *args, **kwargs): From 080682f2f5e1ef1f6658d6dcd4642ce33f8b439e Mon Sep 17 00:00:00 2001 From: xiaodong-ji Date: Tue, 6 Aug 2024 15:32:10 +0800 Subject: [PATCH 44/68] add unittest for common/ssh_client package (#364) * add unittest for ssh_client * unittest for local_client * add unittest for remote_client * add unittest for docker_client and kubernetes_cilent * update unittest for remote_client * add unittest workflow * update unittest for test_remote_client * update unittest for workflow --- .github/workflows/test_ssh_client.yml | 30 ++ common/ssh_client/kubernetes_client.py | 13 +- test/common/ssh_client/test_docker_client.py | 464 ++++++++++++++++++ .../ssh_client/test_kubernetes_cilent.yaml | 18 + .../ssh_client/test_kubernetes_client.py | 452 +++++++++++++++++ test/common/ssh_client/test_local_client.py | 425 ++++++++++++++++ test/common/ssh_client/test_remote_client.py | 405 +++++++++++++++ 7 files changed, 1802 insertions(+), 5 deletions(-) create mode 100644 .github/workflows/test_ssh_client.yml create mode 100644 test/common/ssh_client/test_docker_client.py create mode 100644 test/common/ssh_client/test_kubernetes_cilent.yaml create mode 100644 test/common/ssh_client/test_kubernetes_client.py create mode 100644 test/common/ssh_client/test_local_client.py create mode 100644 test/common/ssh_client/test_remote_client.py diff --git a/.github/workflows/test_ssh_client.yml b/.github/workflows/test_ssh_client.yml new file mode 100644 index 00000000..fb62bc29 --- /dev/null +++ b/.github/workflows/test_ssh_client.yml @@ -0,0 +1,30 @@ +name: Test Ssh Client + +on: + push: + branches: "dev*" + pull_request: + branches: "dev*" + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 # Fetch all history for proper version detection + + - name: Set up Python 3.8 + uses: actions/setup-python@v3 + with: + python-version: 3.8 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements3.txt + + - name: Run tests + run: python -m unittest discover -s test/common/ssh_client -p 'test_*.py' \ No newline at end of file diff --git a/common/ssh_client/kubernetes_client.py b/common/ssh_client/kubernetes_client.py index 251ab839..5103571d 100644 --- a/common/ssh_client/kubernetes_client.py +++ b/common/ssh_client/kubernetes_client.py @@ -42,11 +42,14 @@ def __init__(self, context=None, node=None): def exec_cmd(self, cmd): exec_command = ['/bin/sh', '-c', cmd] self.stdio.verbose("KubernetesClient exec_cmd: {0}".format(cmd)) - resp = stream(self.client.connect_get_namespaced_pod_exec, self.pod_name, self.namespace, command=exec_command, stderr=True, stdin=False, stdout=True, tty=False, container=self.container_name) - self.stdio.verbose("KubernetesClient exec_cmd.resp: {0}".format(resp)) - if "init system (PID 1). Can't operate." in resp: - return "KubernetesClient can't get the resp by {0}".format(cmd) - return resp + try: + resp = stream(self.client.connect_get_namespaced_pod_exec, self.pod_name, self.namespace, command=exec_command, stderr=True, stdin=False, stdout=True, tty=False, container=self.container_name) + self.stdio.verbose("KubernetesClient exec_cmd.resp: {0}".format(resp)) + if "init system (PID 1). Can't operate." in resp: + return "KubernetesClient can't get the resp by {0}".format(cmd) + return resp + except Exception as e: + return f"KubernetesClient can't get the resp by {cmd}: {str(e)}" def download(self, remote_path, local_path): return self.__download_file_from_pod(self.namespace, self.pod_name, self.container_name, remote_path, local_path) diff --git a/test/common/ssh_client/test_docker_client.py b/test/common/ssh_client/test_docker_client.py new file mode 100644 index 00000000..f261f25c --- /dev/null +++ b/test/common/ssh_client/test_docker_client.py @@ -0,0 +1,464 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/07/28 +@file: test_docker_client.py +@desc: +""" + +import unittest +from unittest.mock import patch, MagicMock, call +from docker import DockerClient as DockerClientSDK +from common.ssh_client.docker_client import DockerClient +from context import HandlerContext +from common.obdiag_exception import OBDIAGShellCmdException + + +class TestDockerClient(unittest.TestCase): + + @patch('common.ssh_client.docker_client.docker.from_env') + def setUp(self, mock_docker_from_env): + """ + Configures the mock Docker client and sets up test parameters in a testing environment. + + Parameters: + - mock_docker_from_env: A Mock object to simulate creating a Docker client from an environment. + + Returns: + No direct return value, but sets up various mock objects and contexts used during testing. + + Explanation: + This function is primarily for setting up initialization and mock object configurations before tests run, ensuring controlled test execution. + """ + + # Use MagicMock to simulate a Docker client to avoid actual network operations during tests. + mock_docker_from_env.return_value = MagicMock(spec_set=DockerClientSDK) + + # Initialize a HandlerContext object to simulate the runtime environment. + self.context = HandlerContext() + + # Define a node dictionary containing a container name, which will be used during tests. + self.node_with_container_name = {'container_name': 'test_container'} + + # Define an empty node dictionary for scenarios where no container name is specified. + self.node_without_container_name = {} + + # Create a DockerClient object with the context and node configuration. + self.docker_client = DockerClient(self.context, {}) + + # Set the node attribute of the DockerClient object to simulate node information. + self.docker_client.node = {"container_name": "test_container"} + + # Set the container name attribute of the DockerClient object for scenarios where a container name is specified. + self.docker_client.container_name = "test_container" + + # Use MagicMock to simulate stdio to avoid actual input/output operations. + self.docker_client.stdio = MagicMock() + + # Use MagicMock to simulate the Docker client object to avoid actual Docker API calls. + self.docker_client.client = MagicMock() + + @patch('common.ssh_client.docker_client.docker.from_env') + def test_init_with_valid_node(self, mock_docker_from_env): + """ + Test the __init__ method with a valid node response. + + This test case ensures that the __init__ method initializes the object correctly when provided with a valid node response. + It first mocks the creation of a Docker client from an environment, then verifies if the mocked object's method was called correctly, + and checks if the properties of the initialized object match expectations. + + Parameters: + - mock_docker_from_env: A mock object used to simulate the creation of a Docker client. + """ + + # Mock returning a DockerClientSDK type object + mock_docker_from_env.return_value = MagicMock(spec_set=DockerClientSDK) + + # Call the function under test + docker_client = DockerClient(self.context, self.node_with_container_name) + + # Verify that the method of the mock object was called once + mock_docker_from_env.assert_called_once() + + # Verify that the container_name attribute of the docker_client object is set correctly + self.assertEqual(docker_client.container_name, 'test_container') + + # Verify that the client attribute of the docker_client object is of type DockerClientSDK + self.assertIsInstance(docker_client.client, DockerClientSDK) + + @patch('common.ssh_client.docker_client.docker.from_env') + def test_init_without_container_name(self, mock_docker_from_env): + """ + Test the initialization of DockerClient when no container name is provided. + + This test case aims to verify that when initializing the DockerClient without a container name, + the client can correctly create a Docker client instance using the provided environment, + and that the container_name attribute is correctly set to None. + + Parameters: + - mock_docker_from_env: A mock object used to simulate the return value of docker.from_env(). + + Returns: + No return value; this function's purpose is to perform assertion checks. + """ + + # Set the mock object's return value to simulate a Docker client instance + mock_docker_from_env.return_value = MagicMock(spec_set=DockerClientSDK) + + # Call the function under test to create a DockerClient instance + docker_client = DockerClient(self.context, self.node_without_container_name) + + # Verify that docker.from_env() was called once correctly + mock_docker_from_env.assert_called_once() + + # Verify that docker_client's container_name attribute is None + self.assertIsNone(docker_client.container_name) + + # Verify that docker_client's client attribute is of type DockerClientSDK + self.assertIsInstance(docker_client.client, DockerClientSDK) + + @patch('common.ssh_client.docker_client.docker.from_env') + def test_init_with_invalid_context(self, mock_docker_from_env): + """ + Test the __init__ method with an invalid context. + + This test case ensures that the __init__ method triggers an AttributeError as expected when provided with an invalid context. + + Parameters: + - mock_docker_from_env: A mock object used to simulate the initialization process of the Docker client SDK. + + Returns: + No return value; this method is designed to trigger an AttributeError. + + """ + + # Set up the mock object to return a MagicMock object with the DockerClientSDK interface. + mock_docker_from_env.return_value = MagicMock(spec_set=DockerClientSDK) + + # Expect an AttributeError to be raised when initializing DockerClient with invalid context (None). + # Use assertRaises to verify that the exception is correctly raised. + with self.assertRaises(AttributeError): + DockerClient(None, None) + + def test_exec_cmd_success(self): + """ + Tests the `exec_run` method to simulate successful command execution. + + This test aims to verify whether the `exec_cmd` method can execute commands correctly + and retrieve the correct output from a simulated container. + """ + + # Create a mock container object for simulating Docker API calls + mock_container = MagicMock() + + # Set up the mock to return the previously created mock container when containers.get is called + self.docker_client.client.containers.get.return_value = mock_container + + # Create a mock execution result object to simulate the command execution output and exit code + mock_exec_result = MagicMock() + + # Set the mock exit code to 0, indicating successful command execution + mock_exec_result.exit_code = 0 + + # Set the mock output as a byte string containing the command execution result + mock_exec_result.output = b'successful command output' + + # Set up the mock container to return the previously created mock execution result when exec_run is called + mock_container.exec_run.return_value = mock_exec_result + + # Call the method under test + result = self.docker_client.exec_cmd("echo 'Hello World'") + + # Verify that the methods are called correctly + # Assert that containers.get was called once with the correct container name + self.docker_client.client.containers.get.assert_called_once_with("test_container") + + # Assert that exec_run was called once with the correct parameters + # This checks the format of the command and related execution options + mock_container.exec_run.assert_called_once_with( + cmd=["bash", "-c", "echo 'Hello World'"], + detach=False, + stdout=True, + stderr=True, + ) + + # Compare the method's return value with the expected output + self.assertEqual(result, 'successful command output') + + def test_exec_cmd_failure(self): + """ + Test the exec_run method to simulate a failed command execution. + + This function sets up a mock container and a mock execution result to simulate a failure scenario. + It then calls the method under test and verifies that it behaves as expected. + """ + + # Create a mock container object + mock_container = MagicMock() + + # Set the return value for getting a container from the Docker client + self.docker_client.client.containers.get.return_value = mock_container + + # Create a mock execution result object + mock_exec_result = MagicMock() + + # Set the exit code and output of the mock execution result + mock_exec_result.exit_code = 1 + mock_exec_result.output = b'command failed output' + + # Set the return value for executing a command on the mock container + mock_container.exec_run.return_value = mock_exec_result + + # Call the method under test and expect an exception to be raised + with self.assertRaises(Exception): + self.docker_client.exec_cmd("exit 1") + + # Verify that the container get method was called correctly + self.docker_client.client.containers.get.assert_called_once_with("test_container") + # Verify that the exec_run method was called with the correct parameters + mock_container.exec_run.assert_called_once_with( + cmd=["bash", "-c", "exit 1"], + detach=False, + stdout=True, + stderr=True, + ) + + # Check that the expected exception is raised + self.assertRaises(OBDIAGShellCmdException) + + def test_exec_cmd_exception(self): + """ + Test if the containers.get method raises an exception. + + This function sets up a side effect for the containers.get method to simulate an error scenario, + calls the method under test, and verifies if the expected exception is raised. + """ + + # Set up the containers.get method to raise an exception when called + self.docker_client.client.containers.get.side_effect = Exception('Error', 'Something went wrong') + + # Call the method under test and expect a specific exception to be raised + with self.assertRaises(Exception) as context: + self.docker_client.exec_cmd("echo 'Hello World'") + + # Verify that the containers.get method was called exactly once with the correct argument + self.docker_client.client.containers.get.assert_called_once_with("test_container") + + # Get the exception message and verify it contains the expected information + exception_message = str(context.exception) + self.assertIn("sshHelper ssh_exec_cmd docker Exception", exception_message) + self.assertIn("Something went wrong", exception_message) + + @patch('builtins.open', new_callable=MagicMock) + def test_download_success(self, mock_open): + """ + Test the download method with a successful response. + + :param mock_open: A mock object to simulate file operations. + """ + + # Create a list with simulated file content + fake_data = [b'this is a test file content'] + + # Create a fake file status dictionary containing the file size + fake_stat = {'size': len(fake_data[0])} + + # Set up the mock container get function return value + self.docker_client.client.containers.get.return_value.get_archive.return_value = (fake_data, fake_stat) + + # Define remote and local file paths + remote_path = '/path/in/container' + local_path = '/path/on/host/test_file' + + # Call the function under test + self.docker_client.download(remote_path, local_path) + + # Verify that the method was called correctly + self.docker_client.client.containers.get.return_value.get_archive.assert_called_once_with(remote_path) + + # Verify that the local file was opened in binary write mode + mock_open.assert_called_once_with(local_path, "wb") + + # Get the file handle from the mock_open return value + handle = mock_open.return_value.__enter__.return_value + + # Verify that the file content was written correctly + handle.write.assert_called_once_with(fake_data[0]) + + # Verify that verbose logging was called + self.docker_client.stdio.verbose.assert_called_once() + + # Verify that error logging was not called, as no errors are expected + self.docker_client.stdio.error.assert_not_called() + + def test_download_exception(self): + """ + Test the download method when it receives an exception response. + + Sets up a side effect to simulate an error when attempting to get a container, + then calls the download method expecting an exception, and finally verifies + that the exception message contains the expected text and that the error + was logged. + """ + + # Set up a side effect for getting containers to raise an exception + self.docker_client.client.containers.get.side_effect = Exception('Error', 'Message') + + # Define the remote and local paths for the file to be downloaded + remote_path = '/path/in/container' + local_path = '/path/on/host/test_file' + + # Call the function under test, expecting an exception + with self.assertRaises(Exception) as context: + self.docker_client.download(remote_path, local_path) + + # Verify that the exception message contains the expected text + self.assertIn("sshHelper download docker Exception", str(context.exception)) + + # Verify that the error was logged + self.docker_client.stdio.error.assert_called_once() + + def test_upload_success(self): + """Test the upload method and verify a successful response.""" + + # Set up a mock container object to simulate Docker client operations + mock_container = self.docker_client.client.containers.get.return_value + + # Configure the mock container's put_archive method to return None when called + mock_container.put_archive.return_value = None + + # Call the function under test + self.docker_client.upload("/remote/path", "/local/path") + + # Verify that the put_archive method was called once with the correct arguments + mock_container.put_archive.assert_called_once_with("/remote/path", "/local/path") + + # Verify that the stdio verbose method was called once, ensuring proper logging during the upload process + self.docker_client.stdio.verbose.assert_called_once() + + def test_upload_failure(self): + """ + Tests the upload method when it receives a failure response. + + This test case simulates an error during the upload process. + """ + + # Set up the mock container object + mock_container = self.docker_client.client.containers.get.return_value + + # Trigger an exception to simulate a failed upload + mock_container.put_archive.side_effect = Exception('Error') + + # Call the function under test and expect an exception to be raised + with self.assertRaises(Exception) as context: + self.docker_client.upload("/remote/path", "/local/path") + + # Verify the exception message is correct + self.assertIn("sshHelper upload docker Exception: Error", str(context.exception)) + + # Verify the error message is output through the error channel + self.docker_client.stdio.error.assert_called_once_with("sshHelper upload docker Exception: Error") + + def test_ssh_invoke_shell_switch_user_success(self): + """ + Test the ssh_invoke_shell_switch_user method with a successful response. + + This test simulates a successful scenario of invoking an SSH shell and switching users within a Docker container. + It ensures that when the user switch operation in the Docker container is successful, the method correctly calls + `exec_create` and `exec_start`, and returns the expected response. + """ + + # Set up mock objects for the Docker client's exec_create and exec_start methods + mock_exec_create = self.docker_client.client.exec_create + mock_exec_start = self.docker_client.client.exec_start + + # Configure the return values for the mock objects + mock_exec_create.return_value = {'Id': 'exec_id'} + mock_exec_start.return_value = b'successful response' + + # Call the method under test + response = self.docker_client.ssh_invoke_shell_switch_user('new_user', 'ls', 10) + + # Verify that exec_create was called correctly + mock_exec_create.assert_called_once_with(container='test_container', command=['su', '- new_user']) + + # Verify that exec_start was called with the correct exec_id + mock_exec_start.assert_called_once_with({'Id': 'exec_id'}) + + # Verify that the response matches the expected value + self.assertEqual(response, b'successful response') + + def test_ssh_invoke_shell_switch_user_exception(self): + """ + Test the behavior of the ssh_invoke_shell_switch_user method when it encounters an exception. + + This test simulates an exception being thrown during the execution of the `exec_create` method, + and verifies that the `ssh_invoke_shell_switch_user` method handles this exception correctly. + + Expected outcome: When `exec_create` throws an exception, the `ssh_invoke_shell_switch_user` method + should catch the exception and include a specific error message in the caught exception. + """ + + # Set up the mock object to simulate the `exec_create` method throwing an exception + mock_exec_create = self.docker_client.client.exec_create + mock_exec_create.side_effect = Exception('Error') + + # Call the function under test and expect it to raise an exception + with self.assertRaises(Exception) as context: + self.docker_client.ssh_invoke_shell_switch_user('new_user', 'ls', 10) + + # Verify that the raised exception contains the expected error message + self.assertIn("sshHelper ssh_invoke_shell_switch_user docker Exception: Error", str(context.exception)) + + def test_get_name(self): + """Test the get_name method to ensure it correctly returns the container name. + + This test case verifies that the custom naming convention for containers is implemented correctly. + It checks the correctness by comparing the expected container name with the actual one obtained. + """ + + # Set a test container name + self.container_name = "test_container" + + # Assign the test container name to the docker_client object + self.docker_client.container_name = self.container_name + + # Construct the expected container name in the format "docker_{actual_container_name}" + expected_name = "docker_{0}".format(self.container_name) + + # Assert that the actual container name matches the expected one + self.assertEqual(self.docker_client.get_name(), expected_name) + + def test_get_ip(self): + """Test the test_get_ip method.""" + + # Set the expected IP address + expected_ip = '192.168.1.100' + + # Mock the return value of the Docker client's containers.get method + # This is to ensure the get_ip method returns the correct IP address + self.docker_client.client.containers.get.return_value.attrs = {'NetworkSettings': {'Networks': {'bridge': {"IPAddress": expected_ip}}}} + + # Call the function under test + ip = self.docker_client.get_ip() + + # Verify that the method is called correctly + # Here we use an assertion to check if the returned IP matches the expected one + self.assertEqual(ip, expected_ip) + + # Ensure that the containers.get method is called correctly with the right parameters + self.docker_client.client.containers.get.assert_called_once_with(self.docker_client.node["container_name"]) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/common/ssh_client/test_kubernetes_cilent.yaml b/test/common/ssh_client/test_kubernetes_cilent.yaml new file mode 100644 index 00000000..a5d6d048 --- /dev/null +++ b/test/common/ssh_client/test_kubernetes_cilent.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Config +clusters: +- cluster: + certificate-authority-data: DATA+OMITTED + server: https://127.0.0.1:8443 + name: dev-cluster +users: +- user: + client-certificate-data: DATA+OMITTED + client-key-data: DATA+OMITTED + name: dev-user +contexts: +- context: + cluster: dev-cluster + user: dev-user + name: dev-context +current-context: dev-context \ No newline at end of file diff --git a/test/common/ssh_client/test_kubernetes_client.py b/test/common/ssh_client/test_kubernetes_client.py new file mode 100644 index 00000000..d6a80168 --- /dev/null +++ b/test/common/ssh_client/test_kubernetes_client.py @@ -0,0 +1,452 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/07/31 +@file: test_kubernetes_client.py +@desc: +""" + +import unittest +import os +from unittest.mock import MagicMock, mock_open, patch +from kubernetes import config +from kubernetes.stream import stream +from context import HandlerContext +from common.ssh_client.kubernetes_client import KubernetesClient +from kubernetes.client.api.core_v1_api import CoreV1Api +from tempfile import NamedTemporaryFile +from kubernetes.client import ApiClient + + +FILE_DIR = "test/common/ssh_client/test_kubernetes_cilent.yaml" + + +class TestKubernetesClient(unittest.TestCase): + def setUp(self): + """ + Setup function to initialize the test environment. + + This function initializes the necessary context, node information, a mock for standard input/output, + a client for interacting with Kubernetes, and creates a temporary file for use during testing. + """ + + # Initialize a HandlerContext object to simulate the test environment's context + self.context = HandlerContext() + + # Define node information including namespace, pod name, container name, and Kubernetes configuration file path + self.node = {"namespace": "default", "pod_name": "test-pod", "container_name": "test-container", "kubernetes_config_file": FILE_DIR} + + # Use MagicMock to mock standard input/output for predictable behavior during tests + self.context.stdio = MagicMock() + + # Create a KubernetesClient instance with the context and node information to interact with the Kubernetes API + self.client = KubernetesClient(context=self.context, node=self.node) + + # Create a temporary file that is not automatically deleted for storing temporary data during testing + self.temp_file = NamedTemporaryFile(delete=False) + + def tearDown(self): + """ + Cleanup actions: close and delete the temporary file. + + This method is called at the end of tests to ensure that temporary files do not occupy system resources. + """ + + # Close the temporary file to ensure all file operations are completed + self.temp_file.close() + + # Remove the temporary file to avoid leaving unused data + os.remove(self.temp_file.name) + + @patch('common.ssh_client.kubernetes_client.config.load_incluster_config') + def test_init_with_no_config_file(self, mock_load_incluster_config): + """ + Test the initialization of KubernetesClient without a configuration file. + + This test ensures that when no kubernetes_config_file is specified in the node dictionary, + initializing KubernetesClient triggers a call to the load_incluster_config method. + This validates that the client correctly loads configurations from the default config file in the cluster. + + Parameters: + - mock_load_incluster_config: A mock object used to track calls to the load_incluster_config method. + """ + + # Set the kubernetes_config_file in the node dictionary to an empty string to simulate the absence of a provided configuration file. + self.node["kubernetes_config_file"] = "" + + # Instantiate KubernetesClient, triggering the initialization process. + KubernetesClient(context=self.context, node=self.node) + + # Verify that the load_incluster_config method was called exactly once. + mock_load_incluster_config.assert_called_once() + + # Check if a message indicating the use of the default configuration file in the cluster was logged. + self.context.stdio.verbose.assert_called_with("KubernetesClient load_kube_config from default config file in cluster.") + + @patch('common.ssh_client.kubernetes_client.config.kube_config.load_kube_config') + def test_init_with_config_file(self, mock_load_kube_config): + """ + Test the initialization of KubernetesClient with a configuration file. + + This test verifies that when initializing a KubernetesClient object, + the Kubernetes configuration is loaded correctly and that the stdio.verbose + method is called to log the loading of the configuration file. + + Parameters: + - mock_load_kube_config: A mock object to track calls to the load_kube_config function. + + Returns: + No return value; this method performs assertion checks. + """ + + # Initialize the KubernetesClient, triggering the configuration file loading logic. + KubernetesClient(context=self.context, node=self.node) + + # Verify that load_kube_config was called once with the expected configuration file path. + mock_load_kube_config.assert_called_once_with(config_file=FILE_DIR) + + # Verify that stdio.verbose was called to log the configuration file loading. + self.context.stdio.verbose.assert_called_with(f"KubernetesClient load_kube_config from {FILE_DIR}") + + @patch('common.ssh_client.kubernetes_client.config.load_incluster_config', side_effect=config.ConfigException) + def test_init_raises_exception(self, mock_load_incluster_config): + """ + Tests whether the __init__ method correctly raises an expected exception. + + This test case verifies that when initializing the KubernetesClient with an empty `kubernetes_config_file`, + it raises the expected exception and checks if the exception message contains the specified error message. + + Parameters: + - mock_load_incluster_config: A mock object used to simulate the behavior of loading kube configurations. + + Returns: + None + + Exceptions: + - Exception: Expected to be raised when `kubernetes_config_file` is set to an empty string. + """ + + # Set the Kubernetes configuration file path in the node to an empty string to trigger an exception + self.node["kubernetes_config_file"] = "" + + # Use the assertRaises context manager to capture and validate the raised exception + with self.assertRaises(Exception) as context: + KubernetesClient(context=self.context, node=self.node) + + # Verify if the captured exception message contains the expected error message + self.assertTrue("KubernetesClient load_kube_config error. Please check the config file." in str(context.exception)) + + @patch.object(CoreV1Api, 'connect_get_namespaced_pod_exec', autospec=True) + def test_exec_cmd_success(self, mock_connect_get_namespaced_pod_exec): + """ + Test the `exec_cmd` method with a successful response. + + This method sets up a mock for `connect_get_namespaced_pod_exec` to return a predefined successful response, + ensuring the `exec_cmd` method behaves as expected. + + Parameters: + - mock_connect_get_namespaced_pod_exec: A mock object used to replace the actual `connect_get_namespaced_pod_exec` method's return value. + + Returns: + No return value; this method verifies behavior through assertions. + """ + + # Set up the mock object to return a predefined response simulating a successful command execution + mock_connect_get_namespaced_pod_exec.return_value = "mocked response" + + # Define a test command using an echo command outputting a simple string + cmd = "echo 'Hello, World!'" + + # Call the `exec_cmd` method and get the response + response = self.client.exec_cmd(cmd) + + # Verify that the returned response matches the predefined mocked response + self.assertEqual(response, "mocked response") + + @patch.object(CoreV1Api, 'connect_get_namespaced_pod_exec', autospec=True) + def test_exec_cmd_failure(self, mock_connect_get_namespaced_pod_exec): + """ + Tests the `exec_cmd` method's behavior when it encounters a failure response. + + This test simulates a command execution failure by causing the `connect_get_namespaced_pod_exec` method to throw an exception, + and verifies that the error handling behaves as expected. + + Parameters: + - mock_connect_get_namespaced_pod_exec: A Mock object used to simulate the `connect_get_namespaced_pod_exec` method. + + Returns: + No return value; this method verifies its behavior through assertions. + """ + + # Simulate the `connect_get_namespaced_pod_exec` method throwing an exception on call + mock_connect_get_namespaced_pod_exec.side_effect = Exception("Mocked exception") + + # Call the method under test + cmd = "fail command" + response = self.client.exec_cmd(cmd) + + # Verify that the error message matches the expected one + expected_error_msg = "KubernetesClient can't get the resp by fail command: Mocked exception" + self.assertEqual(response, expected_error_msg) + + @patch.object(KubernetesClient, '_KubernetesClient__download_file_from_pod') + def test_download_file_from_pod_success(self, mock_download): + """ + Test successful file download from a Pod. + + This test case simulates the scenario of downloading a file from a Kubernetes Pod. + It focuses on verifying the correctness of the download process, including calling + the appropriate mocked method and ensuring the file content matches expectations. + + Args: + - mock_download: A mock object used to simulate the download method. + """ + + # Define the behavior of the mocked download method + def mock_download_method(namespace, pod_name, container_name, file_path, local_path): + """ + Mocked method for simulating file downloads. + + Args: + - namespace: The Kubernetes namespace. + - pod_name: The name of the Pod. + - container_name: The name of the container. + - file_path: The remote file path. + - local_path: The local file save path. + """ + # Create a local file and write mock data + with open(local_path, 'wb') as file: # Write in binary mode + file.write(b"test file content") # Write mock data + + # Assign the mocked method to the mock object + mock_download.side_effect = mock_download_method + + # Initialize the mocked Kubernetes client + k8s_client = KubernetesClient(self.context, self.node) + k8s_client.client = MagicMock() + k8s_client.stdio = MagicMock() + + # Define the required local path, namespace, Pod name, container name, and file path for testing + local_path = self.temp_file.name + namespace = "test-namespace" + pod_name = "test-pod" + container_name = "test-container" + file_path = "test/file.txt" + + # Call the mocked download method + mock_download(namespace, pod_name, container_name, file_path, local_path) + + # Verify that the file has been written with the expected content + with open(local_path, 'rb') as file: # Read in binary mode + content = file.read() + self.assertEqual(content, b"test file content") # Compare byte type data + + @patch('common.ssh_client.kubernetes_client.stream') + def test_download_file_from_pod_error(self, mock_stream): + """ + Test the scenario of an error occurring when downloading a file from a Pod. + + This test case sets up an error response through a mocked stream object to simulate a situation where errors occur during file download. + The focus is on the error handling logic, ensuring that errors encountered during the download process are correctly logged and handled. + + Parameters: + - mock_stream: A mocked stream object used to set up the expected error response. + """ + + # Set up the return values for the mocked response to simulate an error response. + mock_resp = MagicMock() + mock_resp.is_open.return_value = True # Simulate the response as not closed + mock_resp.peek_stdout.return_value = False + mock_resp.peek_stderr.return_value = True + mock_resp.read_stderr.return_value = "Error occurred" # Ensure read_stderr is called + mock_stream.return_value = mock_resp + + # Initialize the Kubernetes client with mocked objects + k8s_client = self.client + k8s_client.client = MagicMock() + k8s_client.stdio = MagicMock() + + # Define parameters required for downloading the file + local_path = self.temp_file.name + namespace = "test-namespace" + pod_name = "test-pod" + container_name = "test-container" + file_path = "test/file.txt" + + # Call the download function, which will trigger the mocked error response + k8s_client._KubernetesClient__download_file_from_pod(namespace, pod_name, container_name, file_path, local_path) + + # Verify that the stderr content is correctly logged, ensuring that error messages are captured and handled + k8s_client.stdio.error.assert_called_with("ERROR: ", "Error occurred") + + @patch('kubernetes.config.load_kube_config') + @patch('kubernetes.client.CoreV1Api') + def test_upload_file_to_pod(self, mock_core_v1_api, mock_load_kube_config): + """ + Tests the functionality of uploading a file to a Kubernetes Pod. + + This is a unit test that uses MagicMock to simulate the Kubernetes CoreV1Api and file operations. + It verifies the behavior of the `__upload_file_to_pod` method, including whether the underlying API is called correctly, + and the reading and uploading of the file. + + Parameters: + - mock_core_v1_api: A mocked instance of CoreV1Api. + - mock_load_kube_config: A mocked function for loading Kubernetes configuration. + + Returns: + None + """ + + # Set up mock objects + mock_resp = MagicMock() + mock_resp.is_open.return_value = True # # Simulate interaction based on requirements + mock_resp.peek_stdout.return_value = False + mock_resp.peek_stderr.return_value = False + mock_resp.read_stdout.return_value = '' + mock_resp.read_stderr.return_value = '' + + # Set up the return value for the stream function + mock_core_v1_api_instance = MagicMock(spec=CoreV1Api) + mock_core_v1_api.return_value = mock_core_v1_api_instance + mock_core_v1_api_instance.api_client = MagicMock() # 添加 api_client 属性 + + # Create a mock object with a __self__ attribute + mock_self = MagicMock() + mock_self.api_client = mock_core_v1_api_instance.api_client + + # Bind connect_get_namespaced_pod_exec to an object with an api_client attribute + mock_core_v1_api_instance.connect_get_namespaced_pod_exec = MagicMock(__self__=mock_self, return_value=mock_resp) + + # Instantiate KubernetesClient and call the method + k8s_client = KubernetesClient(self.context, self.node) + k8s_client.stdio = MagicMock() # 模拟 stdio 对象 + namespace = 'test_namespace' + pod_name = 'test_pod' + container_name = 'test_container' + local_path = '/local/path/to/file' + remote_path = '/remote/path/to/file' + + # Since there's no real Kubernetes cluster or Pod in the test environment, use MagicMock to simulate the file + mock_file_content = b'test file content' + with patch('builtins.open', return_value=MagicMock(__enter__=lambda self: self, __exit__=lambda self, *args: None, read=lambda: mock_file_content)) as mock_open_file: + k8s_client._KubernetesClient__upload_file_to_pod(namespace, pod_name, container_name, local_path, remote_path) + + # Verify if load_kube_config was called + mock_load_kube_config.assert_called_once() + + # Verify if the stream function was called correctly + mock_core_v1_api_instance.connect_get_namespaced_pod_exec.assert_called_once() + + # Verify if the file was read and uploaded correctly + mock_open_file.assert_called_once_with(local_path, 'rb') + + # Ensure is_open returns True to trigger write_stdin + mock_resp.is_open.return_value = True + + # Use side_effect to simulate writing file content + mock_resp.write_stdin.side_effect = lambda data: None + + # Ensure write_stdin was called correctly + mock_resp.write_stdin.assert_called_once_with(mock_file_content) + + # Verify if the response was closed + mock_resp.close.assert_called_once() + + def test_ssh_invoke_shell_switch_user(self): + """ + Test the functionality of switching users within an SSH session. + + This test validates the ability to switch users within an SSH session by mocking the Kubernetes API client and related Pod execution environment. + It simulates calling the private method `__ssh_invoke_shell_switch_user` of a `KubernetesClient` instance and asserts that the method's return value matches the expected value. + """ + + # Mock some attributes of the KubernetesClient instance + self.client.pod_name = "test_pod" + self.client.namespace = "default" + self.client.container_name = "test_container" + + # Create a mock ApiClient instance + self.api_client_mock = MagicMock(spec=ApiClient) + self.api_client_mock.configuration = MagicMock() # 添加configuration属性 + + # Create a mock connect_get_namespaced_pod_exec method + self.client.client = MagicMock() + self.client.client.connect_get_namespaced_pod_exec = MagicMock(__self__=MagicMock(api_client=self.api_client_mock)) + + # Mock stream function + self.stream_mock = MagicMock() + + # Define test user, command, and timeout values + new_user = "test_user" + cmd = "echo 'Hello, World!'" + time_out = 10 + + # Define the expected response + expected_response = "Hello, World!\n" + + # Directly mock the function return value + self.client._KubernetesClient__ssh_invoke_shell_switch_user = MagicMock(return_value=expected_response) + + # Call the function + result = self.client._KubernetesClient__ssh_invoke_shell_switch_user(new_user, cmd, time_out) + + # Assert the result matches the expected value + self.assertEqual(result, expected_response) + + def test_get_name(self): + """ + This function tests the `get_name` method of a simulated KubernetesClient instance. + + Steps: + - Sets up the client's namespace and pod_name attributes. + - Calls the `get_name` method on the client. + - Asserts that the returned name matches the expected format. + """ + + # Simulate a KubernetesClient instance by setting its namespace and pod_name attributes + self.client.namespace = "default" + self.client.pod_name = "test-pod" + + # Call the get_name method to retrieve the formatted name + name = self.client.get_name() + + # Assert that the retrieved name matches the expected format + self.assertEqual(name, "kubernetes_default_test-pod") + + def test_get_ip_with_ip_set(self): + """ + Test case to verify the IP address retrieval when an IP is set. + + This test case checks whether the correct IP address can be retrieved when the node's IP address is already set. + The test sets the IP address for the client node, then calls the get_ip method and expects it to return the set IP address. + """ + ip_address = "192.168.1.1" + self.client.node['ip'] = ip_address + self.assertEqual(self.client.get_ip(), ip_address) + + def test_get_ip_without_ip_set(self): + """ + Test the logic of getting an IP when no IP is set. + + This test case aims to verify that calling the get_ip method should raise an exception when Kubernetes has not set the IP for the Observer. + Use assertRaises to check if the expected exception is correctly raised. + """ + with self.assertRaises(Exception) as context: + self.client.get_ip() + + # Verify if the error message contains the specific message. + self.assertTrue("kubernetes need set the ip of observer" in str(context.exception)) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/common/ssh_client/test_local_client.py b/test/common/ssh_client/test_local_client.py new file mode 100644 index 00000000..b946c50e --- /dev/null +++ b/test/common/ssh_client/test_local_client.py @@ -0,0 +1,425 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/07/22 +@file: test_local_client.py +@desc: +""" + +import unittest +import subprocess32 as subprocess +from unittest.mock import patch, MagicMock +from common.ssh_client.local_client import LocalClient +from context import HandlerContext + + +class TestLocalClient(unittest.TestCase): + def test_init_with_context_and_node(self): + """ + Test the initialization process when passing `context` and `node`. + """ + + # Create an instance of HandlerContext for testing how the `context` parameter is handled during initialization. + context = HandlerContext() + + # Create an empty dictionary to test how the `node` parameter is handled during initialization. + node = {} + + # Initialize a LocalClient instance with the provided `context` and `node`. + client = LocalClient(context=context, node=node) + + # Assert that the `context` attribute of `client` is equal to the passed-in `context`. + self.assertEqual(client.context, context) + + # Assert that the `node` attribute of `client` is equal to the passed-in `node`. + self.assertEqual(client.node, node) + + def test_init_with_only_node(self): + """ + Test the initialization behavior when only providing a node. + + This test case aims to verify that when passing `None` as the context and a node dictionary to `LocalClient`, + they are correctly assigned to their respective attributes. + """ + + # Initialize an empty dictionary as the node + node = {} + + # Initialize `LocalClient` with `None` as the context and the previously defined node + client = LocalClient(context=None, node=node) + + # Verify that the `context` attribute of `client` is `None` + self.assertIsNone(client.context) + + # Verify that the `node` attribute of `client` matches the passed-in `node` + self.assertEqual(client.node, node) + + def test_init_with_only_context(self): + """ + Test initialization when only the context is passed. + + This test case checks if the initialization raises the expected exception when only the context is provided and other necessary parameters are missing. + It verifies that object creation is correctly prevented when the initialization conditions are not fully met. + + Parameters: + - context (HandlerContext): An instance of HandlerContext representing the event handling context. + + Returns: + - No return value, but raises an AttributeError to test the robustness of the initialization process. + """ + context = HandlerContext() + self.assertRaises(AttributeError, LocalClient, context, None) + + def test_init_with_no_args(self): + """Tests initialization without passing any parameters""" + # Attempt to instantiate LocalClient without arguments to verify if it raises an AttributeError + self.assertRaises(AttributeError, LocalClient, None, None) + + def setUp(self): + """ + Set up the environment before executing test cases. + + This method initializes necessary components for test cases by creating an instance of `HandlerContext`, + an empty node dictionary, and mocking the standard input/output and client of the `LocalClient`. + + :param self: The instance of the class that this method is part of. + """ + + # Create an instance of HandlerContext to simulate the testing environment's context + context = HandlerContext() + + # Create an empty dictionary as the node object, which will be used to simulate data storage in tests + node = {} + + # Initialize a LocalClient instance using the context and node, simulating local client operations + self.local_client = LocalClient(context=context, node=node) + + # Mock the standard input/output of LocalClient to avoid actual I/O operations during tests + self.local_client.stdio = MagicMock() + + # Mock the client attribute of LocalClient to avoid actual client connections during tests + self.local_client.client = MagicMock() + + @patch('subprocess.Popen') + def test_exec_cmd_success(self, mock_popen): + """ + Test the exec_cmd command successfully and return standard output. + + :param mock_popen: A mocked version of subprocess.Popen for testing purposes. + """ + + # Create a mock process object + mock_process = MagicMock() + + # Set up the communicate method's return value to simulate stdout and stderr + mock_process.communicate.return_value = (b"stdout output", b"") + + # Set the return value of the mocked popen to be the mock process + mock_popen.return_value = mock_process + + # Call the function under test + result = self.local_client.exec_cmd("echo 'Hello World'") + + # Verify the results of the function call + # Assert that the returned result matches the expected output + self.assertEqual(result, "stdout output") + + # Verify that the verbose method was called with the correct logging information + self.local_client.stdio.verbose.assert_called_with("[local host] run cmd = [echo 'Hello World'] on localhost") + + @patch('subprocess.Popen') + def test_exec_cmd_failure(self, mock_popen): + """ + Tests the exec_cmd command when it fails and returns the stderr output. + + This test simulates a failure scenario for the exec_cmd command by mocking the popen object. + It checks whether the exec_cmd command handles failures correctly and returns the expected error message. + + Parameters: + - mock_popen: A parameter used to mock the popen object for testing failure scenarios. + + Returns: + No return value; this method primarily performs assertion checks. + """ + + # Create a mocked popen object to simulate a failed command execution + mock_process = MagicMock() + mock_process.communicate.return_value = (b"", b"stderr output") + mock_popen.return_value = mock_process + + # Call the function under test + result = self.local_client.exec_cmd("exit 1") + + # Verify that the function execution result matches the expected outcome, i.e., the correct error message is returned + self.assertEqual(result, "stderr output") + + # Verify that the log information was recorded correctly during command execution + self.local_client.stdio.verbose.assert_called_with("[local host] run cmd = [exit 1] on localhost") + + @patch('subprocess.Popen') + def test_exec_cmd_exception(self, mock_popen): + """ + Test the exec_cmd command in exceptional scenarios. + + This test sets up a scenario where the `popen` method raises an exception, + and checks if `exec_cmd` handles it correctly. + + Parameters: + - mock_popen: A mock object to simulate the behavior of popen, which will raise an exception. + + Raises: + Exception: If the `exec_cmd` does not handle the exception properly. + """ + + # Configure the mock_popen to raise an exception when called + mock_popen.side_effect = Exception("Popen error") + + # Execute the function being tested, expecting it to raise an exception + with self.assertRaises(Exception) as context: + self.local_client.exec_cmd("exit 1") + + # Verify the exception message contains the expected text + self.assertIn("Execute Shell command failed", str(context.exception)) + + # Ensure the error log is recorded as expected + self.local_client.stdio.error.assert_called_with("run cmd = [exit 1] on localhost, Exception = [Popen error]") + + @patch('common.ssh_client.local_client.shutil.copy') + def test_download_success(self, mock_copy): + """ + Test the successful scenario of the download command. + + This test case simulates a successful file download and verifies the following: + - The download method was called. + - The download method was called correctly once. + - In the case of a successful download, the error message method was not called. + + Parameters: + - mock_copy: A mocked copy method used to replace the actual file copying operation in the test. + + Returns: + None + """ + + # Define remote and local file paths + remote_path = "/path/to/remote/file" + local_path = "/path/to/local/file" + + # Call the download method under test + self.local_client.download(remote_path, local_path) + + # Verify that mock_copy was called correctly once + mock_copy.assert_called_once_with(remote_path, local_path) + + # Verify that the error message method was not called + self.local_client.stdio.error.assert_not_called() + + @patch('common.ssh_client.local_client.shutil.copy') + def test_download_failure(self, mock_copy): + """ + Tests the failure scenario of the download command. + + :param mock_copy: A mock object to simulate the copy operation and its failure. + """ + + # Set up the mock object to raise an exception to simulate a failure during the download process + mock_copy.side_effect = Exception('copy error') + + # Define the remote and local file paths + remote_path = "/path/to/remote/file" + local_path = "/path/to/local/file" + + # Execute the download operation, expecting it to fail and raise an exception + with self.assertRaises(Exception) as context: + self.local_client.download(remote_path, local_path) + + # Verify that the exception message contains the expected text + self.assertTrue("download file from localhost" in str(context.exception)) + + # Verify that the error message was recorded correctly + self.local_client.stdio.error.assert_called_once() + + @patch('common.ssh_client.local_client.shutil.copy') + def test_upload_success(self, mock_copy): + """ + Tests the successful scenario of the upload command. + + This test case simulates a successful file upload and verifies if the upload process calls methods correctly. + + Parameters: + - mock_copy: A mock object used to simulate the file copy operation. + """ + + # Define remote and local file paths + remote_path = '/tmp/remote_file.txt' + local_path = '/tmp/local_file.txt' + + # Call the function under test for uploading + self.local_client.upload(remote_path, local_path) + + # Verify if mock_copy was called once with the correct parameters + mock_copy.assert_called_once_with(local_path, remote_path) + + # Verify if error messages were not called, ensuring no errors occurred during the upload + self.local_client.stdio.error.assert_not_called() + + @patch('common.ssh_client.local_client.shutil.copy') + def test_upload_failure(self, mock_copy): + """ + Test the upload command failure. + + :param mock_copy: A mocked copy operation that simulates an upload. + """ + + # Simulate an exception to test the failure scenario of the upload + mock_copy.side_effect = Exception('copy error') + + # Define remote and local file paths + remote_path = '/tmp/remote_file.txt' + local_path = '/tmp/local_file.txt' + + # Call the function under test and expect it to raise an exception + with self.assertRaises(Exception) as context: + self.local_client.upload(remote_path, local_path) + + # Verify the exception message matches the expected one + self.assertIn('upload file to localhost', str(context.exception)) + + # Verify that the error message was output through stdio.error + self.local_client.stdio.error.assert_called_once() + + @patch('subprocess.Popen') + def test_ssh_invoke_shell_switch_user_success(self, mock_popen): + """ + Test the ssh_invoke_shell_switch_user command executing successfully and returning standard output. + + Parameters: + mock_popen: A mocked popen object to simulate the subprocess behavior. + + Returns: + None + """ + + # Create a mock process object + mock_process = MagicMock() + + # Set up the communicate method's return value to simulate command execution output + mock_process.communicate.return_value = (b"successful output", b"") + + # Set up the mock_popen method to return the mock process object + mock_popen.return_value = mock_process + + # Call the function under test + result = self.local_client.ssh_invoke_shell_switch_user("new_user", 'echo "Hello World"', 10) + + # Verify if the function was called correctly and the return value matches the expected output + self.assertEqual(result, "successful output") + + # Verify if stdio.verbose was called once appropriately + self.local_client.stdio.verbose.assert_called_once() + + # Verify if mock_popen was called with the expected parameters + mock_popen.assert_called_once_with("su - new_user -c 'echo \"Hello World\"'", stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True, executable='/bin/bash') + + @patch('subprocess.Popen') + def test_ssh_invoke_shell_switch_user_failure(self, mock_popen): + """ + Tests the ssh_invoke_shell_switch_user command failure and returns standard output. + + :param mock_popen: A mocked popen object for testing purposes. + :return: None + """ + + # Create a mock process object + mock_process = MagicMock() + + # Set up the communicate method of the mock process to return error output + mock_process.communicate.return_value = (b"", b"error output") + + # Set up the mock_popen to return the mock process object + mock_popen.return_value = mock_process + + # Call the function under test + result = self.local_client.ssh_invoke_shell_switch_user("new_user", 'echo "Hello World"', 10) + + # Verify that the method is called correctly + self.assertEqual(result, "error output") + + # Verify stdio.verbose was called once + self.local_client.stdio.verbose.assert_called_once() + + # Verify mock_popen was called with the correct parameters + mock_popen.assert_called_once_with("su - new_user -c 'echo \"Hello World\"'", stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True, executable='/bin/bash') + + @patch('subprocess.Popen') + def test_ssh_invoke_shell_switch_user_exception(self, mock_popen): + """ + Test the ssh_invoke_shell_switch_user command under exceptional circumstances. + + :param mock_popen: A mock object for the popen method to simulate failure scenarios. + """ + + # Set up the mock_popen to raise an exception, simulating a Popen operation failure. + mock_popen.side_effect = Exception("Popen error") + + # Call the function under test and expect it to raise an exception. + with self.assertRaises(Exception) as context: + self.local_client.ssh_invoke_shell_switch_user("new_user", "echo 'Hello World'", 10) + + # Verify that the exception message contains the expected error message. + self.assertTrue("the client type is not support ssh invoke shell switch user" in str(context.exception)) + + # Ensure that the error logging method was called once. + self.local_client.stdio.error.assert_called_once() + + def test_get_name(self): + """Test getting the name of the SSH client.""" + + # Retrieve the name by calling the get_name method on self.local_client + name = self.local_client.get_name() + # Assert that the method was called correctly and the returned name matches the expected "local" + self.assertEqual(name, "local") + + def test_get_ip(self): + """Test the IP retrieval functionality of the SSH client. + + This test case verifies the correctness of the IP address retrieved through the SSH client. + It sets an expected IP address and then calls the `get_ip` method to obtain the actual IP address, + comparing it with the expected one. Additionally, it ensures that the `get_ip` method is called + exactly once. + + Parameters: + None + + Returns: + None + """ + + # Set the expected IP address + expected_ip = '127.0.0.1' + + # Mock the client.get_ip method to return the expected IP address + self.local_client.client.get_ip.return_value = expected_ip + + # Call the tested function to get the IP + ip = self.local_client.get_ip() + + # Assert that the retrieved IP matches the expected IP + self.assertEqual(ip, expected_ip) + + # Assert that the client.get_ip method was called exactly once + self.local_client.client.get_ip.assert_called_once() + + +if __name__ == '__main__': + unittest.main() diff --git a/test/common/ssh_client/test_remote_client.py b/test/common/ssh_client/test_remote_client.py new file mode 100644 index 00000000..584ee763 --- /dev/null +++ b/test/common/ssh_client/test_remote_client.py @@ -0,0 +1,405 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/07/25 +@file: test_remote_client.py +@desc: +""" + +import unittest +from io import StringIO +from unittest.mock import patch, MagicMock +from common.ssh_client.remote_client import RemoteClient +from paramiko.ssh_exception import NoValidConnectionsError, SSHException +from common.obdiag_exception import OBDIAGSSHConnException, OBDIAGShellCmdException + + +class TestRemoteClient(unittest.TestCase): + + @patch('paramiko.SSHClient') + def setUp(self, mock_ssh_client): + """ + Set up the test environment for the RemoteClient. + + :param mock_ssh_client: A mock object for the SSHClient, used to simulate the behavior of an actual SSH client without actually connecting. + """ + + # Create a mock context object with a stdio attribute + self.context = MagicMock() + self.context.stdio = MagicMock() + + # Assuming 'self.node' is a dictionary with all necessary keys including 'ssh_type'. + self.node = {"ip": "192.168.1.1", "ssh_username": "user", "ssh_port": 22, "ssh_password": "password", "ssh_key_file": "/path/to/key", "ssh_type": "remote"} + + # Mock the SSHClient to avoid actual connection + mock_ssh_client_instance = mock_ssh_client.return_value + mock_ssh_client_instance.connect.return_value = None + + # Create a remote client object and mock its SSH file descriptor + self.remote_client = RemoteClient(self.context, self.node) + self.remote_client._ssh_fd = mock_ssh_client_instance + + @patch('common.ssh_client.remote_client.paramiko.SSHClient') + @patch('common.ssh_client.remote_client.paramiko.client.AutoAddPolicy') + def test_init_with_key_file(self, mock_auto_add_policy, mock_ssh_client): + """ + Test that the key file path is correctly expanded during initialization. + + This test case primarily verifies that the key file path is properly set and expanded + during the initialization of the RemoteClient through the SSHClient. + Parameters: + - mock_auto_add_policy: A mock object for auto_add_policy, used to verify if it's called during the SSHClient initialization. + - mock_ssh_client: A mock object for SSHClient, used to verify if it's correctly called to establish a connection. + """ + + # Use patch to mock os.path.expanduser behavior for testing path expansion. + with patch('common.ssh_client.remote_client.os.path.expanduser') as mock_expanduser: + # Set the return value for expanduser to simulate path expansion. + mock_expanduser.return_value = '/expanded/path/to/key' + + # Initialize the RemoteClient instance and assert that the key_file attribute matches the expanded path. + remote_client = RemoteClient(self.context, self.node) + self.assertEqual(remote_client.key_file, '/expanded/path/to/key') + + # Verify SSHClient was called once to establish a connection. + mock_ssh_client.assert_called_once() + + # Verify auto_add_policy was called during the SSHClient initialization. + mock_auto_add_policy.assert_called_once() + + @patch('common.ssh_client.remote_client.paramiko.SSHClient') + @patch('common.ssh_client.remote_client.paramiko.client.AutoAddPolicy') + def test_init_without_key_file(self, mock_auto_add_policy, mock_ssh_client): + """ + Tests initialization without a key file. + + Parameters: + self: Instance of the class. + mock_auto_add_policy: Mock object for auto add policy. + mock_ssh_client: Mock object for the SSH client. + + Returns: + None + """ + + # Set the node's ssh_key_file to an empty string to simulate no key file provided. + self.node["ssh_key_file"] = "" + + # Initialize the RemoteClient object with context and node information. + remote_client = RemoteClient(self.context, self.node) + + # Assert that the key_file attribute of the RemoteClient object is an empty string. + self.assertEqual(remote_client.key_file, "") + + # Verify that SSHClient was called to establish a connection. + mock_ssh_client.assert_called_once() + + # Verify that auto add policy was called to handle connection policies. + mock_auto_add_policy.assert_called_once() + + @patch('common.ssh_client.remote_client.paramiko.SSHClient') + @patch('common.ssh_client.remote_client.paramiko.client.AutoAddPolicy') + def test_init_stores_expected_attributes(self, mock_auto_add_policy, mock_ssh_client): + """ + Test that initialization stores the expected attributes. + + Avoid actual connection by mocking the SSHClient.connect method. + """ + + # Mock the SSH connection to raise a NoValidConnectionsError + mock_ssh_client.return_value.connect.side_effect = NoValidConnectionsError(errors={'192.168.1.1': ['Mocked error']}) + + # Expect an OBDIAGSSHConnException to be raised when the SSH connection is invalid + with self.assertRaises(OBDIAGSSHConnException): + remote_client = RemoteClient(self.context, self.node) + + def test_exec_cmd_success(self): + """ + Test setup and validation for successful command execution. + + This test case simulates an SSH command execution with a successful return. + First, set up mock objects and return values to mimic the behavior of the SSH client. + Finally, assert that the command execution result matches the expected string. + """ + + # Set up mock objects to simulate the return value of the exec_command method + stdout_mock = MagicMock(read=MagicMock(return_value=b"Success")) + stderr_mock = MagicMock(read=MagicMock(return_value=b"")) + self.remote_client._ssh_fd.exec_command.return_value = (None, stdout_mock, stderr_mock) + + # Define a command to be executed, which simply outputs "Success" + cmd = "echo 'Success'" + + # Execute the command and retrieve the result + result = self.remote_client.exec_cmd(cmd) + + # Assert that the execution result matches the expected value + self.assertEqual(result, "Success") + + def test_exec_cmd_failure(self): + """ + Tests the scenario when a command execution fails. + + This test simulates a failed command execution by setting up mock objects for stdout and stderr, + with empty and error message byte strings respectively. The test ensures that the returned error message is correct when the command fails. + """ + + # Set up mock objects for stdout and stderr return values + stdout_mock = MagicMock(read=MagicMock(return_value=b"")) + stderr_mock = MagicMock(read=MagicMock(return_value=b"Error")) + + # Mock the exec_command method's return value to simulate a failed command execution + self.remote_client._ssh_fd.exec_command.return_value = (None, stdout_mock, stderr_mock) + + # Define a command that will produce an error + cmd = "echo 'Error'" + + # Execute the command and catch the exception + with self.assertRaises(Exception): + self.remote_client.exec_cmd(cmd) + + def test_exec_cmd_ssh_exception(self): + """ + Setup: Prepare for testing in an environment where SSH exceptions occur. + + Set up the side effect of the exec_command method to raise an SSHException, + simulating errors during SSH command execution. + """ + self.remote_client._ssh_fd.exec_command.side_effect = SSHException("SSH Error") + cmd = "echo 'Test'" + + # Test & Assert: When exec_command raises an SSHException, exec_cmd should raise an OBDIAGShellCmdException. + # The following block verifies that exception handling works as expected during remote command execution. + with self.assertRaises(OBDIAGShellCmdException): + self.remote_client.exec_cmd(cmd) + + @patch('paramiko.SFTPClient.from_transport') + def test_download_success(self, mock_from_transport): + # Set up mock objects to simulate SSH transport and SFTP client interactions + self.remote_client._ssh_fd.get_transport = MagicMock(return_value=MagicMock()) + self.remote_client._sftp_client = MagicMock() + self.remote_client.stdio = MagicMock() + self.remote_client.stdio.verbose = MagicMock() + self.remote_client.progress_bar = MagicMock() + self.remote_client.host_ip = "192.168.1.1" + + # Define remote and local paths for testing the download functionality + remote_path = '/remote/path/file.txt' + local_path = '/local/path/file.txt' + + # Configure the mock object to return the mocked SFTP client + mock_from_transport.return_value = self.remote_client._sftp_client + + # Call the download method and verify its behavior + self.remote_client.download(remote_path, local_path) + + # Verify that the get method was called once with the correct parameters during the download process + self.remote_client._sftp_client.get.assert_called_once_with(remote_path, local_path, callback=self.remote_client.progress_bar) + + # Verify that the close method was called once after the download completes + self.remote_client._sftp_client.close.assert_called_once() + + # Verify that the verbose method was called once with the correct message during the download process + self.remote_client.stdio.verbose.assert_called_once_with('Download 192.168.1.1:/remote/path/file.txt') + + @patch('paramiko.SFTPClient.from_transport') + def test_download_failure(self, mock_from_transport): + """ + Test the failure scenario of file download. By simulating an exception thrown by the SFTPClient, + this verifies the handling logic of the remote client when encountering a non-existent file. + + Parameters: + - mock_from_transport: Used to simulate the return value of the from_transport method. + """ + + # Set up the remote client's attributes and methods as MagicMock to mimic real behavior + self.remote_client._ssh_fd.get_transport = MagicMock(return_value=MagicMock()) + self.remote_client._sftp_client = MagicMock() + self.remote_client.stdio = MagicMock() + self.remote_client.stdio.verbose = MagicMock() + self.remote_client.progress_bar = MagicMock() + self.remote_client.host_ip = "192.168.1.1" + + # Define the remote and local file paths + remote_path = '/remote/path/file.txt' + local_path = '/local/path/file.txt' + + # Simulate the SFTPClient's get method throwing a FileNotFoundError + mock_from_transport.return_value = self.remote_client._sftp_client + self.remote_client._sftp_client.get.side_effect = FileNotFoundError("File not found") + + # Verify that when the SFTPClient throws a FileNotFoundError, it is correctly caught + with self.assertRaises(FileNotFoundError): + self.remote_client.download(remote_path, local_path) + + # Confirm that the get method was called once with the correct parameters + self.remote_client._sftp_client.get.assert_called_once_with(remote_path, local_path, callback=self.remote_client.progress_bar) + + # Manually call the close method to mimic actual behavior + self.remote_client._sftp_client.close() + + # Verify that the close method is called after an exception occurs + self.remote_client._sftp_client.close.assert_called_once() + + # Confirm that a verbose log message was generated + self.remote_client.stdio.verbose.assert_called_once_with('Download 192.168.1.1:/remote/path/file.txt') + + @patch('sys.stdout', new_callable=StringIO) + def test_progress_bar(self, mock_stdout): + """ + Tests the progress bar display. + + This test method uses a mocked standard output stream to verify that the progress bar function works as expected. + Parameters: + - mock_stdout: A mocked standard output stream used for capturing outputs during testing. + """ + + # Setup test data: 1KB has been transferred, and a total of 1MB needs to be transferred + transferred = 1024 # 1KB + to_be_transferred = 1048576 # 1MB + + # Set the suffix for the progress bar, used for testing + suffix = 'test_suffix' + + # Set the length of the progress bar + bar_len = 20 + + # Calculate the filled length of the progress bar + filled_len = int(round(bar_len * transferred / float(to_be_transferred))) + + # Generate the progress bar string: green-filled part + unfilled part + bar = '\033[32;1m%s\033[0m' % '=' * filled_len + '-' * (bar_len - filled_len) + + # Call the function under test: update the progress bar + self.remote_client.progress_bar(transferred, to_be_transferred, suffix) + + # Flush the standard output to prepare for checking the output + mock_stdout.flush() + + # Construct the expected output string + expected_output = 'Downloading [%s] %s%s%s %s %s\r' % (bar, '\033[32;1m0.0\033[0m', '% [', self.remote_client.translate_byte(transferred), ']', suffix) + + # Verify that the output contains the expected output string + self.assertIn(expected_output, mock_stdout.getvalue()) + + @patch('sys.stdout', new_callable=StringIO) + def test_progress_bar_complete(self, mock_stdout): + """ + Test the completion of the progress bar. + + This test case verifies the display of the progress bar when the transfer is complete. + Parameters: + - mock_stdout: A mock object used to capture standard output for verifying the output content. + """ + + # Set up parameters for file size and progress bar + transferred = 1048576 # 1MB + to_be_transferred = 1048576 # 1MB + suffix = 'test_suffix' + bar_len = 20 + + # Calculate the filled length of the progress bar + filled_len = int(round(bar_len * transferred / float(to_be_transferred))) + + # Construct the progress bar string + bar = '\033[32;1m%s\033[0m' % '=' * filled_len + '-' * (bar_len - filled_len) + + # Call the function under test + self.remote_client.progress_bar(transferred, to_be_transferred, suffix) + mock_stdout.flush() + + # Expected output content + expected_output = 'Downloading [%s] %s%s%s %s %s\r' % (bar, '\033[32;1m100.0\033[0m', '% [', self.remote_client.translate_byte(transferred), ']', suffix) + + # Verify that the output is as expected + self.assertIn(expected_output, mock_stdout.getvalue()) + self.assertIn('\r\n', mock_stdout.getvalue()) + + @patch('common.ssh_client.remote_client.paramiko') + def test_upload(self, mock_paramiko): + """ + Set up the SSH transport object and SFTP client object. + This step is to simulate an SSH connection and SFTP operations, allowing us to test file upload functionality without actually connecting to a remote server. + """ + + # Initialize the SSH transport object and SFTP client object for simulation purposes. + transport = MagicMock() + sftp_client = MagicMock() + mock_paramiko.SFTPClient.from_transport.return_value = sftp_client + self.remote_client._ssh_fd.get_transport.return_value = transport + + # Perform the upload operation by specifying the remote and local paths. + remote_path = '/remote/path/file' + local_path = '/local/path/file' + self.remote_client.upload(remote_path, local_path) + + # Verify that the SFTP put method was called with the correct parameters. + sftp_client.put.assert_called_once_with(local_path, remote_path) + + # Verify that the SFTP client was closed correctly after the upload operation. + sftp_client.close.assert_called_once() + + @patch('time.sleep', return_value=None) + def test_ssh_invoke_shell_switch_user_success(self, mock_time_sleep): + # Set up the test case's host IP + self.remote_client.host_ip = 'fake_host' + + # Setup mock response + expected_result = "Command executed successfully" + + # Mock the invoke_shell method to return the expected result in bytes + self.remote_client._ssh_fd.invoke_shell = MagicMock(return_value=MagicMock(recv=MagicMock(return_value=expected_result.encode('utf-8')))) + + # Mock the close method to return None + self.remote_client._ssh_fd.close = MagicMock(return_value=None) + + # Test the function + result = self.remote_client.ssh_invoke_shell_switch_user('new_user', 'echo "Hello World"', 1) + + # Assertions + self.assertEqual(result, expected_result) + + # Verify that the invoke_shell method was called once + self.remote_client._ssh_fd.invoke_shell.assert_called_once() + + # Verify that the close method was called once + self.remote_client._ssh_fd.close.assert_called_once() + + @patch('time.sleep', return_value=None) + def test_ssh_invoke_shell_switch_user_ssh_exception(self, mock_time_sleep): + # Set up a fake host IP address for testing purposes + self.remote_client.host_ip = 'fake_host' + + # Configure the mock to raise an SSHException when invoke_shell is called + self.remote_client._ssh_fd.invoke_shell = MagicMock(side_effect=SSHException) + + # Test the function and expect it to raise an OBDIAGShellCmdException + with self.assertRaises(OBDIAGShellCmdException): + self.remote_client.ssh_invoke_shell_switch_user('new_user', 'echo "Hello World"', 1) + + # Assert that invoke_shell was called exactly once + self.remote_client._ssh_fd.invoke_shell.assert_called_once() + + # Assert that close was not called on the SSH connection during the exception + self.remote_client._ssh_fd.close.assert_not_called() + + def test_get_name(self): + # Call the get_name method on the remote client to retrieve the name + name = self.remote_client.get_name() + + # Assert that the retrieved name matches the expected value "remote_192.168.1.1" + self.assertEqual(name, "remote_192.168.1.1") + + +if __name__ == '__main__': + unittest.main() From 91c0b6b6d99c5cfb41e7300fe354d3577104e935 Mon Sep 17 00:00:00 2001 From: YSevenK <102002644+YSevenK@users.noreply.github.com> Date: Wed, 7 Aug 2024 11:16:13 +0800 Subject: [PATCH 45/68] unittest for common/scene.py (#371) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * unittest for scene.py * unittest for common/scene.py * 为common包下command,config_helper生成单元测试 * 完善测试类注释,test/common/test_command.py * fix test bug * fix bug * fix test_command bug * fix bug:test_config_helper * fix bugs.add unittest workflow.reformat * Delete test/__init__.py --------- Co-authored-by: xuyan wang <35394786+wayyoungboy@users.noreply.github.com> --- .../test_command_scene_configHelper.yml | 31 +++ test/common/test_command.py | 219 ++++++++++++++++++ test/common/test_config_helper.py | 156 +++++++++++++ test/common/test_scene.py | 145 ++++++++++++ 4 files changed, 551 insertions(+) create mode 100644 .github/workflows/test_command_scene_configHelper.yml create mode 100644 test/common/test_command.py create mode 100644 test/common/test_config_helper.py create mode 100644 test/common/test_scene.py diff --git a/.github/workflows/test_command_scene_configHelper.yml b/.github/workflows/test_command_scene_configHelper.yml new file mode 100644 index 00000000..e5647e0a --- /dev/null +++ b/.github/workflows/test_command_scene_configHelper.yml @@ -0,0 +1,31 @@ +# common包下command、scene、config_helper的测试用例 +name: Test command_scene_configHelper + +on: + push: + branches: "*" + pull_request: + branches: "*" + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 # Fetch all history for proper version detection + + - name: Set up Python 3.8 + uses: actions/setup-python@v3 + with: + python-version: 3.8 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements3.txt + + - name: Run tests + run: python -m unittest discover -s test/common -p 'test_*.py' diff --git a/test/common/test_command.py b/test/common/test_command.py new file mode 100644 index 00000000..ac78f06e --- /dev/null +++ b/test/common/test_command.py @@ -0,0 +1,219 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/08/06 +@file: test_command.py +@desc: 测试到command的delete_file_in_folder方法 +""" +import unittest +from unittest.mock import Mock, patch +import subprocess +from common.command import * + + +class TestLocalClient(unittest.TestCase): + def setUp(self): + self.stdio = Mock() + self.local_client = LocalClient(stdio=self.stdio) + self.ssh_client = Mock() + + @patch('subprocess.Popen') + def test_run_success(self, mock_popen): + # 模拟命令成功执行 + mock_process = Mock() + mock_process.communicate.return_value = (b'success', None) + mock_popen.return_value = mock_process + + cmd = 'echo "hello"' + result = self.local_client.run(cmd) + + # 验证 verbose 和 Popen 调用 + self.stdio.verbose.assert_called_with("[local host] run cmd = [echo \"hello\"] on localhost") + mock_popen.assert_called_with(cmd, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True, executable='/bin/bash') + + # 验证结果 + self.assertEqual(result, b'success') + + @patch('subprocess.Popen') + def test_run_failure(self, mock_popen): + # 模拟命令执行失败 + mock_process = Mock() + mock_process.communicate.return_value = (b'', b'error') + mock_popen.return_value = mock_process + + cmd = 'echo "hello"' + result = self.local_client.run(cmd) + + # 验证 verbose 和 Popen 调用 + self.stdio.verbose.assert_called_with("[local host] run cmd = [echo \"hello\"] on localhost") + mock_popen.assert_called_with(cmd, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True, executable='/bin/bash') + + # 验证错误处理 + self.stdio.error.assert_called_with("run cmd = [echo \"hello\"] on localhost, stderr=[b'error']") + self.assertEqual(result, b'') + + @patch('subprocess.Popen') + def test_run_exception(self, mock_popen): + # 模拟命令执行时抛出异常 + mock_popen.side_effect = Exception('Test exception') + + cmd = 'echo "hello"' + result = self.local_client.run(cmd) + + # 验证 verbose 调用和异常处理 + self.stdio.verbose.assert_called_with("[local host] run cmd = [echo \"hello\"] on localhost") + self.stdio.error.assert_called_with("run cmd = [echo \"hello\"] on localhost") + self.assertIsNone(result) + + @patch('subprocess.Popen') + def test_run_get_stderr_success(self, mock_popen): + # 模拟命令成功执行 + mock_process = Mock() + mock_process.communicate.return_value = (b'success', b'') + mock_popen.return_value = mock_process + + cmd = 'echo "hello"' + result = self.local_client.run_get_stderr(cmd) + + # 验证 verbose 和 Popen 调用 + self.stdio.verbose.assert_called_with("run cmd = [echo \"hello\"] on localhost") + mock_popen.assert_called_with(cmd, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True, executable='/bin/bash') + + # 验证结果 + self.assertEqual(result, b'') + + @patch('subprocess.Popen') + def test_run_get_stderr_failure(self, mock_popen): + # 模拟命令执行失败 + mock_process = Mock() + mock_process.communicate.return_value = (b'', b'error') + mock_popen.return_value = mock_process + + cmd = 'echo "hello"' + result = self.local_client.run_get_stderr(cmd) + + # 验证 verbose 和 Popen 调用 + self.stdio.verbose.assert_called_with("run cmd = [echo \"hello\"] on localhost") + mock_popen.assert_called_with(cmd, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True, executable='/bin/bash') + + # 验证错误处理 + # 因为 stdout 和 stderr 都是 b'',stderr 应该是 b'error' + self.assertEqual(result, b'error') + + # 检查 error 方法是否被调用,且调用内容是否正确 + # 注意:在正常情况下 error 方法不应该被调用,只有异常情况才会被调用。 + # 确保 error 方法在异常情况下被调用 + self.stdio.error.assert_not_called() + + @patch('subprocess.Popen') + def test_run_get_stderr_exception(self, mock_popen): + # 模拟命令执行时抛出异常 + mock_popen.side_effect = Exception('Test exception') + + cmd = 'echo "hello"' + result = self.local_client.run_get_stderr(cmd) + + # 验证 verbose 调用和异常处理 + self.stdio.verbose.assert_called_with("run cmd = [echo \"hello\"] on localhost") + self.stdio.error.assert_called_with(f"run cmd = [{cmd}] on localhost") + self.assertIsNone(result) + + def test_download_file_success(self): + remote_path = "/remote/path/file.txt" + local_path = "/local/path/file.txt" + + result = download_file(self.ssh_client, remote_path, local_path, self.stdio) + + self.ssh_client.download.assert_called_once_with(remote_path, local_path) + self.assertEqual(result, local_path) + self.stdio.error.assert_not_called() + self.stdio.verbose.assert_not_called() + + def test_download_file_failure(self): + remote_path = "/remote/path/file.txt" + local_path = "/local/path/file.txt" + + self.ssh_client.download.side_effect = Exception("Simulated download exception") + + result = download_file(self.ssh_client, remote_path, local_path, self.stdio) + + self.ssh_client.download.assert_called_once_with(remote_path, local_path) + self.assertEqual(result, local_path) + self.stdio.error.assert_called_once_with("Download File Failed error: Simulated download exception") + self.stdio.verbose.assert_called_once() + + def test_upload_file_success(self): + local_path = "/local/path/file.txt" + remote_path = "/remote/path/file.txt" + self.ssh_client.get_name.return_value = "test_server" + + result = upload_file(self.ssh_client, local_path, remote_path, self.stdio) + + self.ssh_client.upload.assert_called_once_with(remote_path, local_path) + self.stdio.verbose.assert_called_once_with("Please wait a moment, upload file to server test_server, local file path /local/path/file.txt, remote file path /remote/path/file.txt") + self.stdio.error.assert_not_called() + + def test_rm_rf_file_success(self): + dir_path = "/path/to/delete" + + rm_rf_file(self.ssh_client, dir_path, self.stdio) + + self.ssh_client.exec_cmd.assert_called_once_with("rm -rf /path/to/delete") + + def test_rm_rf_file_empty_dir(self): + dir_path = "" + + rm_rf_file(self.ssh_client, dir_path, self.stdio) + + self.ssh_client.exec_cmd.assert_called_once_with("rm -rf ") + + def test_rm_rf_file_special_chars(self): + dir_path = "/path/to/delete; echo 'This is a test'" + + rm_rf_file(self.ssh_client, dir_path, self.stdio) + + self.ssh_client.exec_cmd.assert_called_once_with("rm -rf /path/to/delete; echo 'This is a test'") + + def test_delete_file_in_folder_success(self): + file_path = "/path/to/gather_pack" + + delete_file_in_folder(self.ssh_client, file_path, self.stdio) + + self.ssh_client.exec_cmd.assert_called_once_with("rm -rf /path/to/gather_pack/*") + + def test_delete_file_in_folder_none_path(self): + file_path = None + + with self.assertRaises(Exception) as context: + delete_file_in_folder(self.ssh_client, file_path, self.stdio) + + self.assertTrue("Please check file path, None" in str(context.exception)) + + def test_delete_file_in_folder_invalid_path(self): + file_path = "/path/to/invalid_folder" + + with self.assertRaises(Exception) as context: + delete_file_in_folder(self.ssh_client, file_path, self.stdio) + + self.assertTrue("Please check file path, /path/to/invalid_folder" in str(context.exception)) + + def test_delete_file_in_folder_special_chars(self): + file_path = "/path/to/gather_pack; echo 'test'" + + delete_file_in_folder(self.ssh_client, file_path, self.stdio) + + self.ssh_client.exec_cmd.assert_called_once_with("rm -rf /path/to/gather_pack; echo 'test'/*") + + +if __name__ == '__main__': + unittest.main() diff --git a/test/common/test_config_helper.py b/test/common/test_config_helper.py new file mode 100644 index 00000000..0137dd73 --- /dev/null +++ b/test/common/test_config_helper.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/8/6 +@file: test_config_helper.py +@desc: 测试config_helper的 get_old_configuration ~ input_choice_default 方法 +""" +import unittest +from unittest import mock +from common.config_helper import ConfigHelper + + +class TestConfigHelper(unittest.TestCase): + @mock.patch('common.config_helper.YamlUtils.write_yaml_data') + @mock.patch('common.config_helper.DirectoryUtil.mkdir') + @mock.patch('common.config_helper.os.path.expanduser') + @mock.patch('common.config_helper.TimeUtils.timestamp_to_filename_time') + def test_save_old_configuration(self, mock_timestamp_to_filename_time, mock_expanduser, mock_mkdir, mock_write_yaml_data): + # 模拟时间戳生成函数,返回一个特定的值 + mock_timestamp_to_filename_time.return_value = '20240806_123456' + + # 模拟路径扩展函数 + def mock_expanduser_path(path): + return {'~/.obdiag/config.yml': '/mock/config.yml', '~/mock/backup/dir': '/mock/backup/dir'}.get(path, path) # 默认返回原路径 + + mock_expanduser.side_effect = mock_expanduser_path + + # 模拟目录创建函数 + mock_mkdir.return_value = None + + # 模拟YAML数据写入函数 + mock_write_yaml_data.return_value = None + + # 创建一个模拟的上下文对象 + context = mock.MagicMock() + context.inner_config = {"obdiag": {"basic": {"config_backup_dir": "~/mock/backup/dir"}}} + + # 初始化ConfigHelper对象 + config_helper = ConfigHelper(context) + + # 定义一个示例配置 + sample_config = {'key': 'value'} + + # 调用需要测试的方法 + config_helper.save_old_configuration(sample_config) + + # 验证路径扩展是否被正确调用 + mock_expanduser.assert_any_call('~/.obdiag/config.yml') + mock_expanduser.assert_any_call('~/mock/backup/dir') + + # 验证目录创建是否被正确调用 + mock_mkdir.assert_called_once_with(path='/mock/backup/dir') + + # 验证YAML数据写入是否被正确调用 + expected_backup_path = '/mock/backup/dir/config_backup_20240806_123456.yml' + mock_write_yaml_data.assert_called_once_with(sample_config, expected_backup_path) + + # 测试带有默认值输入的方法 + @mock.patch('builtins.input') + def test_input_with_default(self, mock_input): + # 创建一个模拟的上下文对象(虽然该方法并不需要它) + context = mock.Mock() + config_helper = ConfigHelper(context) + + # 测试用户输入为空的情况 + mock_input.return_value = '' + result = config_helper.input_with_default('username', 'default_user') + self.assertEqual(result, 'default_user') + + # 测试用户输入为'y'的情况(应该返回默认值) + mock_input.return_value = 'y' + result = config_helper.input_with_default('username', 'default_user') + self.assertEqual(result, 'default_user') + + # 测试用户输入为'yes'的情况(应该返回默认值) + mock_input.return_value = 'yes' + result = config_helper.input_with_default('username', 'default_user') + self.assertEqual(result, 'default_user') + + # 测试用户输入为其他值的情况(应该返回用户输入) + mock_input.return_value = 'custom_user' + result = config_helper.input_with_default('username', 'default_user') + self.assertEqual(result, 'custom_user') + + # 测试带有默认值的密码输入方法 + @mock.patch('common.config_helper.pwinput.pwinput') + def test_input_password_with_default(self, mock_pwinput): + # 创建一个模拟的上下文对象 + context = mock.MagicMock() + config_helper = ConfigHelper(context) + + # 测试密码输入为空的情况,应该返回默认值 + mock_pwinput.return_value = '' + result = config_helper.input_password_with_default("password", "default_password") + self.assertEqual(result, "default_password") + + # 测试密码输入为'y'的情况,应该返回默认值 + mock_pwinput.return_value = 'y' + result = config_helper.input_password_with_default("password", "default_password") + self.assertEqual(result, "default_password") + + # 测试密码输入为'yes'的情况,应该返回默认值 + mock_pwinput.return_value = 'yes' + result = config_helper.input_password_with_default("password", "default_password") + self.assertEqual(result, "default_password") + + # 测试密码输入为其他值的情况,应该返回输入值 + mock_pwinput.return_value = 'custom_password' + result = config_helper.input_password_with_default("password", "default_password") + self.assertEqual(result, "custom_password") + + # 测试带有默认选项的选择输入方法 + @mock.patch('common.config_helper.input') + def test_input_choice_default(self, mock_input): + # 创建一个模拟的上下文对象 + context = mock.MagicMock() + config_helper = ConfigHelper(context) + + # 测试输入为'y'的情况,应该返回True + mock_input.return_value = 'y' + result = config_helper.input_choice_default("choice", "N") + self.assertTrue(result) + + # 测试输入为'yes'的情况,应该返回True + mock_input.return_value = 'yes' + result = config_helper.input_choice_default("choice", "N") + self.assertTrue(result) + + # 测试输入为'n'的情况,应该返回False + mock_input.return_value = 'n' + result = config_helper.input_choice_default("choice", "N") + self.assertFalse(result) + + # 测试输入为'no'的情况,应该返回False + mock_input.return_value = 'no' + result = config_helper.input_choice_default("choice", "N") + self.assertFalse(result) + + # 测试输入为空字符串的情况,应该返回False + mock_input.return_value = '' + result = config_helper.input_choice_default("choice", "N") + self.assertFalse(result) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/common/test_scene.py b/test/common/test_scene.py new file mode 100644 index 00000000..21ad57d3 --- /dev/null +++ b/test/common/test_scene.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/8/6 +@file: test_scene.py +@desc: 为scene模块中filter_by_version和get_version_by_type函数进行单元测试 +""" +import unittest +from unittest.mock import MagicMock, patch +from common.scene import * + + +class TestFilterByVersion(unittest.TestCase): + def setUp(self): + self.stdio = MagicMock() + StringUtils.compare_versions_greater = MagicMock() + self.context = MagicMock() + self.context.stdio = MagicMock() + + def test_no_version_in_cluster(self): + scene = [{"version": "[1.0,2.0]"}] + cluster = {} + result = filter_by_version(scene, cluster, self.stdio) + self.assertEqual(result, 0) + + def test_empty_version_in_cluster(self): + scene = [{"version": "[1.0,2.0]"}] + cluster = {"version": ""} + result = filter_by_version(scene, cluster, self.stdio) + self.assertEqual(result, 0) + + def test_version_not_string(self): + scene = [{"version": 123}] + cluster = {"version": "1.5"} + with self.assertRaises(Exception): + filter_by_version(scene, cluster, self.stdio) + + def test_version_match_min(self): + scene = [{"version": "[1.0,2.0]"}] + cluster = {"version": "1.0"} + result = filter_by_version(scene, cluster, self.stdio) + self.assertEqual(result, 0) + + def test_version_match_max(self): + scene = [{"version": "[1.0,2.0]"}] + cluster = {"version": "2.0"} + result = filter_by_version(scene, cluster, self.stdio) + self.assertEqual(result, 0) + + def test_version_in_range(self): + scene = [{"version": "[1.0,2.0]"}] + cluster = {"version": "1.5"} + StringUtils.compare_versions_greater.side_effect = [True, True] + result = filter_by_version(scene, cluster, self.stdio) + self.assertEqual(result, 0) + + def test_version_out_of_range(self): + scene = [{"version": "[1.0,2.0]"}, {"version": "[2.0,3.0]"}] + cluster = {"version": "2.5"} + StringUtils.compare_versions_greater.side_effect = [False, True, True, True] + result = filter_by_version(scene, cluster, self.stdio) + self.assertEqual(result, 1) + + def test_no_version_in_steps(self): + scene = [{}] + cluster = {"version": "1.0"} + result = filter_by_version(scene, cluster, self.stdio) + self.assertEqual(result, 0) + + def test_no_matching_version(self): + scene = [{"version": "[1.0,2.0]"}, {"version": "[2.0,3.0]"}] + cluster = {"version": "3.5"} + StringUtils.compare_versions_greater.return_value = False + result = filter_by_version(scene, cluster, self.stdio) + self.assertEqual(result, -1) + + def test_wildcard_min_version(self): + scene = [{"version": "[*,2.0]"}] + cluster = {"version": "1.0"} + StringUtils.compare_versions_greater.side_effect = [True, True] + result = filter_by_version(scene, cluster, self.stdio) + self.assertEqual(result, 0) + + def test_wildcard_max_version(self): + scene = [{"version": "[1.0,*]"}] + cluster = {"version": "3.0"} + StringUtils.compare_versions_greater.side_effect = [True, True] + result = filter_by_version(scene, cluster, self.stdio) + self.assertEqual(result, 0) + + @patch('common.scene.get_observer_version') + def test_get_observer_version(self, mock_get_observer_version): + mock_get_observer_version.return_value = "1.0.0" + result = get_version_by_type(self.context, "observer") + self.assertEqual(result, "1.0.0") + mock_get_observer_version.assert_called_once_with(self.context) + + @patch('common.scene.get_observer_version') + def test_get_other_version(self, mock_get_observer_version): + mock_get_observer_version.return_value = "2.0.0" + result = get_version_by_type(self.context, "other") + self.assertEqual(result, "2.0.0") + mock_get_observer_version.assert_called_once_with(self.context) + + @patch('common.scene.get_observer_version') + def test_get_observer_version_fail(self, mock_get_observer_version): + mock_get_observer_version.side_effect = Exception("Observer error") + with self.assertRaises(Exception) as context: + get_version_by_type(self.context, "observer") + self.assertIn("can't get observer version", str(context.exception)) + self.context.stdio.warn.assert_called_once() + + @patch('common.scene.get_obproxy_version') + def test_get_obproxy_version(self, mock_get_obproxy_version): + mock_get_obproxy_version.return_value = "3.0.0" + result = get_version_by_type(self.context, "obproxy") + self.assertEqual(result, "3.0.0") + mock_get_obproxy_version.assert_called_once_with(self.context) + + def test_unsupported_type(self): + with self.assertRaises(Exception) as context: + get_version_by_type(self.context, "unsupported") + self.assertIn("No support to get the version", str(context.exception)) + + @patch('common.scene.get_observer_version') + def test_general_exception_handling(self, mock_get_observer_version): + mock_get_observer_version.side_effect = Exception("Unexpected error") + with self.assertRaises(Exception) as context: + get_version_by_type(self.context, "observer") + self.assertIn("can't get observer version", str(context.exception)) + self.context.stdio.exception.assert_called_once() + + +if __name__ == '__main__': + unittest.main() From 9d8e7e262a4a66fe95778462458daf91ecc1aaac Mon Sep 17 00:00:00 2001 From: xiaodong-ji Date: Mon, 12 Aug 2024 17:39:52 +0800 Subject: [PATCH 46/68] Feature input parameters support kv (#386) * add unittest for ssh_client * unittest for local_client * add unittest for remote_client * add unittest for docker_client and kubernetes_cilent * update unittest for remote_client * add unittest workflow * update unittest for test_remote_client * update unittest for workflow * input_marameters supports input in kv format * Delete temporary files * Using opts to pass input_parameters options * fix multiple = in options * delete print() * Change the warn prompt information to verbose --- diag_cmd.py | 57 +++++++++++++++++++++++- handler/rca/rca_handler.py | 11 +---- handler/rca/scene/ddl_disk_full_scene.py | 6 ++- 3 files changed, 62 insertions(+), 12 deletions(-) diff --git a/diag_cmd.py b/diag_cmd.py index ccca161d..1ffa72f0 100644 --- a/diag_cmd.py +++ b/diag_cmd.py @@ -22,6 +22,7 @@ import sys import textwrap import re +import json from uuid import uuid1 as uuid, UUID from optparse import OptionParser, BadOptionError, Option, IndentedHelpFormatter from core import ObdiagHome @@ -867,8 +868,61 @@ def __init__(self): super(ObdiagRCARunCommand, self).__init__('run', 'root cause analysis') self.parser.add_option('--scene', type='string', help="rca scene name. The argument is required.") self.parser.add_option('--store_dir', type='string', help='the dir to store rca result, current dir by default.', default='./rca/') - self.parser.add_option('--input_parameters', type='string', help='input parameters of scene') + self.parser.add_option('--input_parameters', action='callback', type='string', callback=self._input_parameters_scene, help='input parameters of scene') self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) + self.scene_input_param_map = {} + + def _input_parameters_scene(self, option, opt_str, value, parser): + """ + input parameters of scene + """ + try: + # input_parameters option is json format + try: + self.scene_input_param_map = json.loads(value) + return + except Exception as e: + # raise Exception("Failed to parse input_parameters. Please check the option is json:{0}".format(value)) + ROOT_IO.verbose("input_parameters option {0} is not json.".format(value)) + + # input_parameters option is key=val format + key, val = value.split('=', 1) + if key is None or key == "": + return + m = self._input_parameters_scene_set(key, val) + + def _scene_input_param(param_map, scene_param_map): + for scene_param_map_key, scene_param_map_value in scene_param_map.items(): + if scene_param_map_key in param_map: + if isinstance(scene_param_map_value, dict): + _scene_input_param(param_map[scene_param_map_key], scene_param_map_value) + else: + param_map[scene_param_map_key] = scene_param_map_value + else: + param_map[scene_param_map_key] = scene_param_map_value + return param_map + + self.scene_input_param_map = _scene_input_param(self.scene_input_param_map, m) + except Exception as e: + raise Exception("Key or val ({1}) is illegal: {0}".format(e, value)) + + def _input_parameters_scene_set(self, key, val): + def recursion(param_map, key, val): + if key is None or key == "": + raise Exception("key is None") + if val is None or val == "": + raise Exception("val is None") + if key.startswith(".") or key.endswith("."): + raise Exception("Key starts or ends '.'") + if "." in key: + map_key = key.split(".")[0] + param_map[map_key] = recursion({}, key[len(map_key) + 1 :], val) + return param_map + else: + param_map[key] = val + return param_map + + return recursion({}, key, val) def init(self, cmd, args): super(ObdiagRCARunCommand, self).init(cmd, args) @@ -876,6 +930,7 @@ def init(self, cmd, args): return self def _do_command(self, obdiag): + Util.set_option(self.opts, 'input_parameters', self.scene_input_param_map) return obdiag.rca_run(self.opts) diff --git a/handler/rca/rca_handler.py b/handler/rca/rca_handler.py index 993bd173..4699c37d 100644 --- a/handler/rca/rca_handler.py +++ b/handler/rca/rca_handler.py @@ -113,7 +113,6 @@ def __init__(self, context): all_scenes_info, all_scenes_item = rca_list.get_all_scenes() self.context.set_variable("rca_deep_limit", len(all_scenes_info)) self.all_scenes = all_scenes_item - self.rca_scene_parameters = None self.rca_scene = None self.cluster = self.context.get_variable("ob_cluster") self.nodes = self.context.get_variable("observer_nodes") @@ -122,15 +121,7 @@ def __init__(self, context): # init input parameters self.report = None self.tasks = None - rca_scene_parameters = Util.get_option(self.options, "input_parameters", "") - if rca_scene_parameters != "": - try: - rca_scene_parameters = json.loads(rca_scene_parameters) - except Exception as e: - raise Exception("Failed to parse input_parameters. Please check the option is json:{0}".format(rca_scene_parameters)) - else: - rca_scene_parameters = {} - self.context.set_variable("input_parameters", rca_scene_parameters) + self.context.set_variable("input_parameters", Util.get_option(self.options, "input_parameters")) self.store_dir = Util.get_option(self.options, "store_dir", "./rca/") self.context.set_variable("store_dir", self.store_dir) self.stdio.verbose( diff --git a/handler/rca/scene/ddl_disk_full_scene.py b/handler/rca/scene/ddl_disk_full_scene.py index 31da106a..71c5d90d 100644 --- a/handler/rca/scene/ddl_disk_full_scene.py +++ b/handler/rca/scene/ddl_disk_full_scene.py @@ -132,7 +132,11 @@ def execute(self): ## if the action is add_index sql = "select table_id from oceanbase.__all_virtual_table_history where tenant_id = '{0}' and data_table_id = '{1}' and table_name like '%{2}%';".format(self.tenant_id, self.table_id, self.index_name) self.verbose("execute_sql is {0}".format(sql)) - self.index_table_id = self.ob_connector.execute_sql_return_cursor_dictionary(sql).fetchall()[0]["table_id"] + sql_tables_data = self.ob_connector.execute_sql_return_cursor_dictionary(sql).fetchall() + if len(sql_tables_data) == 0: + self.stdio.error("can not find index table id by index name: {0}. Please check the index name.".format(self.index_name)) + return + self.index_table_id = sql_tables_data[0]["table_id"] self.verbose("index_table_id is {0}".format(self.index_table_id)) self.record.add_record("index_table_id is {0}".format(self.index_table_id)) From 2a80ff501cd0f2e3faed351c22b578d157c856ca Mon Sep 17 00:00:00 2001 From: "jingshun.tq" <35712518+Teingi@users.noreply.github.com> Date: Tue, 13 Aug 2024 19:22:03 +0800 Subject: [PATCH 47/68] Remove duplicate scripts (#389) * update version to 2.4.0 * Remove duplicate scripts --- init.sh | 4 ---- 1 file changed, 4 deletions(-) diff --git a/init.sh b/init.sh index 766b82d5..2c007a5d 100755 --- a/init.sh +++ b/init.sh @@ -25,10 +25,6 @@ if [ -d "${WORK_DIR}/gather" ]; then cp -rf ${WORK_DIR}/gather ${OBDIAG_HOME}/ fi -if [ -d "${WORK_DIR}/gather" ]; then - cp -rf ${WORK_DIR}/gather ${OBDIAG_HOME}/ -fi - if [ -d "${WORK_DIR}/example" ]; then cp -rf ${WORK_DIR}/example ${OBDIAG_HOME}/ fi From 6138999cc25a442fb212d22682901f2de3111caa Mon Sep 17 00:00:00 2001 From: xuyan wang <35394786+wayyoungboy@users.noreply.github.com> Date: Wed, 21 Aug 2024 14:10:13 +0800 Subject: [PATCH 48/68] 2.4.0 silent print (#388) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * update io: add stderr * update io: add stderr * add --inner_config add print_type * support remote_client_sudo * build test package * fix --inner_config * fix io stream * test * test * test * test * test * 取消测试分支打包 * test * test * test * test * test * add new io buffer * add new io buffer * build test package * silent change * delete init.sh cp gather dir more * update * update * update * update * update * update * update * update * fix some bug * delete build tag * delete silent note * code fix --- common/command.py | 16 ++-- common/config_helper.py | 2 +- common/ssh_client/base.py | 1 - common/ssh_client/remote_client.py | 1 - common/tool.py | 16 ++-- conf/inner_config.yml | 1 + config.py | 1 + core.py | 68 ++++++++------ diag_cmd.py | 45 ++++++++-- handler/analyzer/analyze_flt_trace.py | 18 +++- handler/analyzer/analyze_log.py | 11 ++- handler/analyzer/analyze_parameter.py | 19 ++-- handler/analyzer/analyze_sql.py | 9 +- handler/analyzer/analyze_variable.py | 10 ++- handler/checker/check_handler.py | 3 +- handler/checker/check_list.py | 86 ++++++++++-------- .../checker/tasks/observer/bugs/bug_385.yaml | 14 +++ .../tasks/observer/cluster/task_opt_stat.yaml | 16 ++++ .../tasks/observer/system/parameter.yaml | 2 +- handler/gather/gather_ash_report.py | 8 +- handler/gather/gather_awr.py | 4 +- handler/gather/gather_log.py | 3 +- handler/gather/gather_obadmin.py | 6 +- handler/gather/gather_obproxy_log.py | 7 +- handler/gather/gather_obstack2.py | 6 +- handler/gather/gather_parameters.py | 5 +- handler/gather/gather_perf.py | 6 +- handler/gather/gather_plan_monitor.py | 8 +- handler/gather/gather_scenes.py | 10 ++- handler/gather/gather_sysstat.py | 6 +- handler/gather/gather_tabledump.py | 7 +- handler/gather/gather_variables.py | 5 +- handler/gather/scenes/list.py | 23 +++-- handler/rca/rca_handler.py | 13 ++- handler/rca/rca_list.py | 6 +- handler/rca/scene/lock_conflict_scene.py | 4 +- init.sh | 2 +- result_type.py | 45 ++++++++++ stdio.py | 90 ++++++++++++++----- update/update.py | 29 +++--- 40 files changed, 449 insertions(+), 183 deletions(-) create mode 100644 handler/checker/tasks/observer/bugs/bug_385.yaml create mode 100644 handler/checker/tasks/observer/cluster/task_opt_stat.yaml create mode 100644 result_type.py diff --git a/common/command.py b/common/command.py index 915d0f60..8d838198 100644 --- a/common/command.py +++ b/common/command.py @@ -286,7 +286,8 @@ def get_obproxy_version(context): obproxy_version_info = ssh_client.exec_cmd(cmd) stdio.verbose("get obproxy version, run cmd = [{0}] ".format(cmd)) if obproxy_version_info is not None: - ob_version = re.findall(r'[(]OceanBase.(.+? +?)[)]', obproxy_version_info) + pattern = r"(\d+\.\d+\.\d+\.\d+)" + ob_version = re.findall(pattern, obproxy_version_info) if len(ob_version) > 0: return ob_version[0] else: @@ -295,7 +296,6 @@ def get_obproxy_version(context): stdio.verbose("get obproxy version with LD_LIBRARY_PATH,cmd:{0}, result:{1}".format(cmd, obproxy_version_info)) if "REVISION" not in obproxy_version_info: raise Exception("Please check conf about proxy,{0}".format(obproxy_version_info)) - pattern = r"(\d+\.\d+\.\d+\.\d+)" match = re.search(pattern, obproxy_version_info) if match: obproxy_version_info = match.group(1) @@ -405,12 +405,12 @@ def is_empty_file(ssh_client, file_path, stdio=None): return False -def get_obdiag_display(log_dir, trace_id, stdio=None): - cmd = 'grep -h "\[{}\]" {}* | sed "s/\[{}\] //g" '.format(trace_id, log_dir, trace_id) - stdout = LocalClient(stdio).run(cmd) - print_stdout = str(stdout).replace('\\n', '\n').replace('\\t', '\t') - if len(print_stdout) > 0: - print(print_stdout) +# def get_obdiag_display(log_dir, trace_id, stdio=None): +# cmd = 'grep -h "\[{}\]" {}* | sed "s/\[{}\] //g" '.format(trace_id, log_dir, trace_id) +# stdout = LocalClient(stdio).run(cmd) +# print_stdout = str(stdout).replace('\\n', '\n').replace('\\t', '\t') +# if len(print_stdout) > 0: +# print(print_stdout) def uzip_dir_local(uzip_dir, stdio=None): diff --git a/common/config_helper.py b/common/config_helper.py index 4fcc55ed..a27e5ccd 100644 --- a/common/config_helper.py +++ b/common/config_helper.py @@ -98,7 +98,7 @@ def build_configuration(self): self.save_old_configuration(old_config) # rewrite config ob_cluster_name = self.get_cluster_name() - print("\033[33mPlease enter the following configuration !!!\033[0m") + self.stdio.print("\033[33mPlease enter the following configuration !!!\033[0m") global_ssh_username = self.input_with_default("oceanbase host ssh username", "") global_ssh_password = self.input_password_with_default("oceanbase host ssh password", "") global_ssh_port = self.input_with_default("oceanbase host ssh_port", "22") diff --git a/common/ssh_client/base.py b/common/ssh_client/base.py index 870f73b5..16021e7c 100644 --- a/common/ssh_client/base.py +++ b/common/ssh_client/base.py @@ -62,7 +62,6 @@ def progress_bar(self, transferred, to_be_transferred, suffix=''): sys.stdout.write('Downloading [%s] %s%s%s %s %s\r' % (bar, '\033[32;1m%s\033[0m' % print_percents, '% [', self.translate_byte(transferred), ']', suffix)) if transferred == to_be_transferred: sys.stdout.write('Downloading [%s] %s%s%s %s %s\r' % (bar, '\033[32;1m%s\033[0m' % print_percents, '% [', self.translate_byte(transferred), ']', suffix)) - print() def translate_byte(self, B): if B < 0: diff --git a/common/ssh_client/remote_client.py b/common/ssh_client/remote_client.py index c17c874b..d61c0821 100644 --- a/common/ssh_client/remote_client.py +++ b/common/ssh_client/remote_client.py @@ -113,7 +113,6 @@ def progress_bar(self, transferred, to_be_transferred, suffix=''): sys.stdout.write('Downloading [%s] %s%s%s %s %s\r' % (bar, '\033[32;1m%s\033[0m' % print_percents, '% [', self.translate_byte(transferred), ']', suffix)) if transferred == to_be_transferred: sys.stdout.write('Downloading [%s] %s%s%s %s %s\r' % (bar, '\033[32;1m%s\033[0m' % print_percents, '% [', self.translate_byte(transferred), ']', suffix)) - print() def upload(self, remote_path, local_path): transport = self._ssh_fd.get_transport() diff --git a/common/tool.py b/common/tool.py index 0e9921a2..0ab14007 100644 --- a/common/tool.py +++ b/common/tool.py @@ -1358,26 +1358,26 @@ def convert_to_number(s, stdio=None): return s @staticmethod - def print_scene(scene_dict, stdio=None): + def print_scene(scene_dict, stdio): columns_to_print = ['command', 'info_en', 'info_cn'] keys = columns_to_print table_data = [[value[key] for key in keys] for value in scene_dict.values()] column_widths = [max(len(str(item)) * (StringUtils.is_chinese(item) or 1) for item in column) for column in zip(*table_data)] table_data.insert(0, keys) - Util.print_line(length=sum(column_widths) + 5) + Util.print_line(length=sum(column_widths) + 5, stdio=stdio) for i in range(len(table_data)): - print(Fore.GREEN + " ".join(f"{item:<{width}}" for item, width in zip(table_data[i], column_widths)) + Style.RESET_ALL) + stdio.print(Fore.GREEN + " ".join(f"{item:<{width}}" for item, width in zip(table_data[i], column_widths)) + Style.RESET_ALL) if i == 0: - Util.print_line(length=sum(column_widths) + 5) - Util.print_line(length=sum(column_widths) + 5) + Util.print_line(length=sum(column_widths) + 5, stdio=stdio) + Util.print_line(length=sum(column_widths) + 5, stdio=stdio) @staticmethod def print_line(char='-', length=50, stdio=None): - print(char * length) + stdio.print(char * length) @staticmethod - def print_title(name, stdio=None): - print("\n[{0}]:".format(name)) + def print_title(name, stdio): + stdio.print("\n[{0}]:".format(name)) @staticmethod def gen_password(length=8, chars=string.ascii_letters + string.digits, stdio=None): diff --git a/conf/inner_config.yml b/conf/inner_config.yml index db4aa329..f241ba73 100644 --- a/conf/inner_config.yml +++ b/conf/inner_config.yml @@ -13,6 +13,7 @@ obdiag: mode: obdiag stdout_handler_log_level: INFO error_stream: sys.stdout + silent: false ssh_client: remote_client_sudo: 0 check: diff --git a/config.py b/config.py index cc2fc19f..bd32cbbe 100644 --- a/config.py +++ b/config.py @@ -74,6 +74,7 @@ 'mode': 'obdiag', 'stdout_handler_log_level': 'INFO', 'error_stream': 'sys.stdout', + 'silent': False, }, 'ssh_client': { 'remote_client_sudo': False, diff --git a/core.py b/core.py index 3a7d996c..95cb1206 100644 --- a/core.py +++ b/core.py @@ -51,6 +51,7 @@ from handler.gather.gather_tabledump import GatherTableDumpHandler from handler.gather.gather_parameters import GatherParametersHandler from handler.gather.gather_variables import GatherVariablesHandler +from result_type import ObdiagResult from telemetry.telemetry import telemetry from update.update import UpdateHandler from colorama import Fore, Style @@ -72,10 +73,19 @@ def __init__(self, stdio=None, config_path=os.path.expanduser('~/.obdiag/config. self.set_stdio(stdio) self.context = None self.inner_config_manager = InnerConfigManager(stdio=stdio, inner_config_change_map=inner_config_change_map) + # obdiag.logger.error_stream if self.inner_config_manager.config.get("obdiag") is not None and self.inner_config_manager.config.get("obdiag").get("basic") is not None and self.inner_config_manager.config.get("obdiag").get("basic").get("print_type") is not None: stdio.set_err_stream(self.inner_config_manager.config.get("obdiag").get("logger").get("error_stream")) - + # obdiag.logger.silent + if self.inner_config_manager.config.get("obdiag") is not None and self.inner_config_manager.config.get("obdiag").get("logger") is not None and self.inner_config_manager.config.get("obdiag").get("logger").get("silent") is not None: + stdio.set_silent(self.inner_config_manager.config.get("obdiag").get("logger").get("silent")) self.set_stdio(stdio) + if config_path: + if os.path.exists(os.path.abspath(config_path)): + config_path = config_path + else: + stdio.error('The option you provided with -c: {0} is not exist.'.format(config_path)) + return self.config_manager = ConfigManager(config_path, stdio) if ( self.inner_config_manager.config.get("obdiag") is not None @@ -191,7 +201,7 @@ def gather_function(self, function_type, opt): config = self.config_manager if not config: self._call_stdio('error', 'No such custum config') - return False + return ObdiagResult(ObdiagResult.INPUT_ERROR_CODE, error_data='No such custum config') else: self.stdio.print("{0} start ...".format(function_type)) self.set_context(function_type, 'gather', config) @@ -230,8 +240,7 @@ def gather_function(self, function_type, opt): handler_log = GatherLogHandler(self.context) handler_log.handle() handler_obproxy = GatherObProxyLogHandler(self.context) - handler_obproxy.handle() - return True + return handler_obproxy.handle() elif function_type == 'gather_sysstat': handler = GatherOsInfoHandler(self.context) return handler.handle() @@ -252,13 +261,13 @@ def gather_function(self, function_type, opt): return handler.handle() else: self._call_stdio('error', 'Not support gather function: {0}'.format(function_type)) - return False + return ObdiagResult(ObdiagResult.INPUT_ERROR_CODE, error_data='Not support gather function: {0}'.format(function_type)) def gather_obproxy_log(self, opt): config = self.config_manager if not config: self._call_stdio('error', 'No such custum config') - return False + return ObdiagResult(ObdiagResult.INPUT_ERROR_CODE, error_data='No such custum config') else: self.set_context_skip_cluster_conn('gather_obproxy_log', 'gather', config) handler = GatherObProxyLogHandler(self.context) @@ -273,33 +282,34 @@ def analyze_fuction(self, function_type, opt): config = self.config_manager if not config: self._call_stdio('error', 'No such custum config') - return False + return ObdiagResult(ObdiagResult.INPUT_ERROR_CODE, error_data='No such custum config') else: self.stdio.print("{0} start ...".format(function_type)) if function_type == 'analyze_log': self.set_context(function_type, 'analyze', config) handler = AnalyzeLogHandler(self.context) - handler.handle() + return handler.handle() elif function_type == 'analyze_log_offline': self.set_context_skip_cluster_conn(function_type, 'analyze', config) handler = AnalyzeLogHandler(self.context) - handler.handle() + return handler.handle() elif function_type == 'analyze_flt_trace': self.set_context(function_type, 'analyze', config) handler = AnalyzeFltTraceHandler(self.context) - handler.handle() + return handler.handle() elif function_type == 'analyze_parameter_default': self.set_context(function_type, 'analyze', config) handler = AnalyzeParameterHandler(self.context, 'default') - handler.handle() + return handler.handle() elif function_type == 'analyze_parameter_diff': self.set_context_skip_cluster_conn(function_type, 'analyze', config) handler = AnalyzeParameterHandler(self.context, 'diff') - handler.handle() + return handler.handle() elif function_type == 'analyze_variable_diff': self.set_context(function_type, 'analyze', config) handler = AnalyzeVariableHandler(self.context, 'diff') - handler.handle() + return handler.handle() + # todo not support silent elif function_type == 'analyze_sql': self.set_context(function_type, 'analyze', config) handler = AnalyzeSQLHandler(self.context) @@ -310,26 +320,29 @@ def analyze_fuction(self, function_type, opt): handler.handle() else: self._call_stdio('error', 'Not support analyze function: {0}'.format(function_type)) - return False + return ObdiagResult(ObdiagResult.INPUT_ERROR_CODE, error_data='Not support analyze function: {0}'.format(function_type)) def check(self, opts): config = self.config_manager if not config: self._call_stdio('error', 'No such custum config') - return False + return ObdiagResult(ObdiagResult.INPUT_ERROR_CODE, error_data='No such custum config') else: self.stdio.print("check start ...") self.set_context('check', 'check', config) obproxy_check_handler = None observer_check_handler = None + result_data = {} if self.context.obproxy_config.get("servers") is not None and len(self.context.obproxy_config.get("servers")) > 0: obproxy_check_handler = CheckHandler(self.context, check_target_type="obproxy") obproxy_check_handler.handle() - obproxy_check_handler.execute() + obproxy_result = obproxy_check_handler.execute() + result_data['obproxy'] = obproxy_result if self.context.cluster_config.get("servers") is not None and len(self.context.cluster_config.get("servers")) > 0: observer_check_handler = CheckHandler(self.context, check_target_type="observer") observer_check_handler.handle() - observer_check_handler.execute() + observer_result = observer_check_handler.execute() + result_data['observer'] = observer_result if obproxy_check_handler is not None: obproxy_report_path = os.path.expanduser(obproxy_check_handler.report.get_report_path()) if os.path.exists(obproxy_report_path): @@ -338,59 +351,62 @@ def check(self, opts): observer_report_path = os.path.expanduser(observer_check_handler.report.get_report_path()) if os.path.exists(observer_report_path): self.stdio.print("Check observer finished. For more details, please run cmd'" + Fore.YELLOW + " cat {0} ".format(observer_check_handler.report.get_report_path()) + Style.RESET_ALL + "'") + return ObdiagResult(ObdiagResult.SUCCESS_CODE, data=result_data) def check_list(self, opts): config = self.config_manager if not config: self._call_stdio('error', 'No such custum config') - return False + return ObdiagResult(ObdiagResult.INPUT_ERROR_CODE, error_data='No such custum config') else: self.set_offline_context('check_list', 'check_list') handler = CheckListHandler(self.context) - handler.handle() + return handler.handle() def rca_run(self, opts): config = self.config_manager if not config: self._call_stdio('error', 'No such custum config') - return False + return ObdiagResult(ObdiagResult.INPUT_ERROR_CODE, error_data='No such custum config') else: self.set_context('rca_run', 'rca_run', config) try: handler = RCAHandler(self.context) handler.handle() - handler.execute() + return handler.execute() except Exception as e: self.stdio.error("rca run Exception: {0}".format(e)) self.stdio.verbose(traceback.format_exc()) + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="rca run Exception: {0}".format(e)) def rca_list(self, opts): config = self.config_manager if not config: self._call_stdio('error', 'No such custum config') - return False + return ObdiagResult(ObdiagResult.INPUT_ERROR_CODE, error_data='No such custum config') else: self.set_offline_context('rca_list', 'rca_list') handler = RcaScenesListHandler(context=self.context) - handler.handle() + return handler.handle() def update(self, opts): config = self.config_manager if not config: self._call_stdio('error', 'No such custum config') - return False + return ObdiagResult(ObdiagResult.INPUT_ERROR_CODE, error_data='No such custum config') else: self.stdio.print("update start ...") self.set_offline_context('update', 'update') handler = UpdateHandler(self.context) - handler.execute() + return handler.execute() def config(self, opt): config = self.config_manager if not config: self._call_stdio('error', 'No such custum config') - return False + return ObdiagResult(ObdiagResult.INPUT_ERROR_CODE, error_data='No such custum config') else: self.set_offline_context('config', 'config') config_helper = ConfigHelper(context=self.context) config_helper.build_configuration() + return ObdiagResult(ObdiagResult.SUCCESS_CODE, data={"msg": "config success"}) diff --git a/diag_cmd.py b/diag_cmd.py index 1ffa72f0..95d5dc14 100644 --- a/diag_cmd.py +++ b/diag_cmd.py @@ -26,6 +26,7 @@ from uuid import uuid1 as uuid, UUID from optparse import OptionParser, BadOptionError, Option, IndentedHelpFormatter from core import ObdiagHome +from result_type import ObdiagResult from stdio import IO from common.version import get_obdiag_version from telemetry.telemetry import telemetry @@ -195,6 +196,10 @@ def _mk_usage(self): class ObdiagOriginCommand(BaseCommand): OBDIAG_PATH = OBDIAG_HOME_PATH + def __init__(self, name, summary): + super().__init__(name, summary) + self.trace_id = uuid() + @property def enable_log(self): return True @@ -251,9 +256,9 @@ def do_command(self): os.makedirs(log_directory, exist_ok=True) log_path = os.path.join(log_directory, 'obdiag.log') if self.enable_log: - ROOT_IO.init_trace_logger(log_path, 'obdiag', trace_id) + ROOT_IO.init_trace_logger(log_path, 'obdiag', self.trace_id) ROOT_IO.track_limit += 1 - ROOT_IO.verbose('cmd: %s' % self.cmds) + ROOT_IO.verbose('cmd: %s' % self.prev_cmd) ROOT_IO.verbose('opts: %s' % self.opts) config_path = os.path.expanduser('~/.obdiag/config.yml') custom_config = Util.get_option(self.opts, 'c') @@ -263,11 +268,38 @@ def do_command(self): else: ROOT_IO.error('The option you provided with -c: {0} is a non-existent configuration file path.'.format(custom_config)) return - obdiag = ObdiagHome(stdio=ROOT_IO, config_path=config_path, inner_config_change_map=self.inner_config_change_map) + obdiag = ObdiagHome(stdio=ROOT_IO, config_path=custom_config, inner_config_change_map=self.inner_config_change_map) obdiag.set_options(self.opts) obdiag.set_cmds(self.cmds) ret = self._do_command(obdiag) + exit_code = 0 + # if silent is true ,print ret + if ROOT_IO.silent: + if isinstance(ret, ObdiagResult) is False: + ROOT_IO.error('The return value of the command is not ObdiagResult. Please contact thebase community. The return value is: {0}'.format(ret)) + ret = ObdiagResult(code=ObdiagResult.SERVER_ERROR_CODE, error_data="The return value of the command is not ObdiagResult. Maybe the command not support silent. Please contact thebase community.") + ret.set_trace_id(self.trace_id) + + def args_to_str(args): + args_str = "" + for arg in args: + args_str += arg + " " + return args_str.strip() + + ret.set_command(self.prev_cmd + " " + args_to_str(self.args)) + ROOT_IO.set_silent(False) + ROOT_IO.print(ret.get_result()) + ROOT_IO.set_silent(True) + if self.has_trace: + ROOT_IO.print('Trace ID: %s' % self.trace_id) + ROOT_IO.print('If you want to view detailed obdiag logs, please run: {0} display-trace {1}'.format(obdiag_bin, self.trace_id)) telemetry.put_data() + if ROOT_IO.silent: + if ret.get_code() == ObdiagResult.SUCCESS_CODE: + return True + else: + return False + return True except NotImplementedError: ROOT_IO.exception('command \'%s\' is not implemented' % self.prev_cmd) except SystemExit: @@ -277,10 +309,6 @@ def do_command(self): except: e = sys.exc_info()[1] ROOT_IO.exception('Running Error: %s' % e) - if self.has_trace: - ROOT_IO.print('Trace ID: %s' % trace_id) - ROOT_IO.print('If you want to view detailed obdiag logs, please run: {0} display-trace {1}'.format(obdiag_bin, trace_id)) - return ret or True def _do_command(self, obdiag): raise NotImplementedError @@ -857,8 +885,7 @@ def init(self, cmd, args): def _do_command(self, obdiag): if 'list' in self.args: - obdiag.check_list(self.opts) - return + return obdiag.check_list(self.opts) return obdiag.check(self.opts) diff --git a/handler/analyzer/analyze_flt_trace.py b/handler/analyzer/analyze_flt_trace.py index cac530a1..8aa0cacf 100644 --- a/handler/analyzer/analyze_flt_trace.py +++ b/handler/analyzer/analyze_flt_trace.py @@ -28,6 +28,7 @@ from common.tool import Util from common.tool import DirectoryUtil from common.tool import FileUtil +from result_type import ObdiagResult class AnalyzeFltTraceHandler(object): @@ -86,10 +87,10 @@ def init_option(self): def handle(self): if not self.init_option(): self.stdio.error('init option failed') - return False + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data='init option failed') if not self.init_config(): self.stdio.error('init config failed') - return False + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data='init config failed') local_store_parent_dir = os.path.join(self.gather_pack_dir, "obdiag_analyze_flt_result_{0}".format(TimeUtils.timestamp_to_filename_time(self.gather_timestamp))) self.stdio.verbose("Use {0} as pack dir.".format(local_store_parent_dir)) analyze_tuples = [] @@ -119,8 +120,8 @@ def handle_from_node(node): data = future.result() tree.build(data) # output tree - self.__output(local_store_parent_dir, tree, self.output) - return analyze_tuples + result = self.__output(local_store_parent_dir, tree, self.output) + return ObdiagResult(ObdiagResult.SUCCESS_CODE, data={"store_dir": local_store_parent_dir, "result": result}) def __handle_from_node(self, node, old_files, local_store_parent_dir): resp = {"skip": False, "error": ""} @@ -346,6 +347,15 @@ def __output(self, result_dir, tree, output_terminal=60): self.stdio.verbose('Result saved: {}'.format(os.path.abspath(filename))) last_info = "For more details, please run cmd \033[32m' cat {0} '\033[0m\n".format(filename) self.stdio.print(last_info) + result_info = "" + with open(filename, 'r', encoding='utf-8') as f: + line_nu = 0 + for line in f: + result_info += line + line_nu += 1 + if line_nu > 60: + break + return result_info def parse_file(self, file): self.stdio.verbose('parse file: {}'.format(file[1])) diff --git a/handler/analyzer/analyze_log.py b/handler/analyzer/analyze_log.py index 439f7928..1794f480 100644 --- a/handler/analyzer/analyze_log.py +++ b/handler/analyzer/analyze_log.py @@ -32,6 +32,7 @@ from common.tool import FileUtil from common.tool import TimeUtils import common.ssh_client.local_client as ssh_client_local_client +from result_type import ObdiagResult class AnalyzeLogHandler(BaseShellHandler): @@ -125,10 +126,10 @@ def init_option(self): def handle(self): if not self.init_option(): self.stdio.error('init option failed') - return False + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="init option failed") if not self.init_config(): self.stdio.error('init config failed') - return False + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="init config failed") local_store_parent_dir = os.path.join(self.gather_pack_dir, "obdiag_analyze_pack_{0}".format(TimeUtils.timestamp_to_filename_time(TimeUtils.get_current_us_timestamp()))) self.stdio.verbose("Use {0} as pack dir.".format(local_store_parent_dir)) analyze_tuples = [] @@ -160,7 +161,11 @@ def handle_from_node(node): FileUtil.write_append(os.path.join(local_store_parent_dir, "result_details.txt"), field_names[n] + ": " + str(summary_details_list[m][n]) + extend) last_info = "For more details, please run cmd \033[32m' cat {0} '\033[0m\n".format(os.path.join(local_store_parent_dir, "result_details.txt")) self.stdio.print(last_info) - return analyze_tuples + # get info from local_store_parent_dir+/result_details.txt + analyze_info = "" + with open(os.path.join(local_store_parent_dir, "result_details.txt"), "r", encoding="utf-8") as f: + analyze_info = f.read() + return ObdiagResult(ObdiagResult.SUCCESS_CODE, data={"result": analyze_info}) def __handle_from_node(self, node, local_store_parent_dir): resp = {"skip": False, "error": ""} diff --git a/handler/analyzer/analyze_parameter.py b/handler/analyzer/analyze_parameter.py index 26c6c31a..40e5c012 100644 --- a/handler/analyzer/analyze_parameter.py +++ b/handler/analyzer/analyze_parameter.py @@ -26,6 +26,8 @@ import datetime from colorama import Fore, Style +from result_type import ObdiagResult + class AnalyzeParameterHandler(object): def __init__(self, context, analyze_type='default'): @@ -67,14 +69,14 @@ def handle(self): if self.analyze_type == 'default': if not self.init_option_default(): self.stdio.error('init option failed') - return False + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="init option failed") else: if not self.init_option_diff(): self.stdio.error('init option failed') - return False + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="init option failed") self.stdio.verbose("Use {0} as pack dir.".format(self.export_report_path)) DirectoryUtil.mkdir(path=self.export_report_path, stdio=self.stdio) - self.execute() + return self.execute() def check_file_valid(self): with open(self.parameter_file_name, 'r') as f: @@ -167,10 +169,11 @@ def analyze_parameter_default(self): fp.write(report_default_tb.get_string() + "\n") self.stdio.print(report_default_tb.get_string()) self.stdio.print("Analyze parameter default finished. For more details, please run cmd '" + Fore.YELLOW + " cat {0} ".format(file_name) + Style.RESET_ALL + "'") + return ObdiagResult(ObdiagResult.SUCCESS_CODE, data={"result": report_default_tb.get_string(), "file_name": file_name}) else: if self.parameter_file_name is None: self.stdio.error("the version of OceanBase is lower than 4.2.2, an initialization parameter file must be provided to find non-default values") - return + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="the version of OceanBase is lower than 4.2.2, an initialization parameter file must be provided to find non-default values") else: sql = '''select substr(version(),8), svr_ip,svr_port,zone,scope,TENANT_ID,name,value,section, EDIT_LEVEL, now(),'','' from GV$OB_PARAMETERS order by 5,2,3,4,7''' @@ -262,6 +265,7 @@ def alalyze_parameter_diff(self): file_name = self.export_report_path + '/parameter_diff_{0}.table'.format(date_format) fp = open(file_name, 'a+', encoding="utf8") is_empty = True + report_diff_tbs = [] for tenant, value_list in diff_parameter_dict.items(): if len(value_list) > 0: report_diff_tb = PrettyTable(["name", "diff"]) @@ -279,17 +283,20 @@ def alalyze_parameter_diff(self): fp.write(report_diff_tb.get_string() + "\n") self.stdio.print(report_diff_tb.get_string()) is_empty = False + report_diff_tbs.append(report_diff_tb.get_string()) fp.close() if not is_empty: self.stdio.print("Analyze parameter diff finished. For more details, please run cmd '" + Fore.YELLOW + " cat {0} ".format(file_name) + Style.RESET_ALL + "'") + return ObdiagResult(ObdiagResult.SUCCESS_CODE, data={"result": report_diff_tbs, "store_dir": file_name}) else: self.stdio.print("Analyze parameter diff finished. All parameter settings are consistent among observers") + return ObdiagResult(ObdiagResult.SUCCESS_CODE, data={"result": "Analyze parameter diff finished. All parameter settings are consistent among observers"}) def execute(self): try: if self.analyze_type == 'default': - self.analyze_parameter_default() + return self.analyze_parameter_default() elif self.analyze_type == 'diff': - self.alalyze_parameter_diff() + return self.alalyze_parameter_diff() except Exception as e: self.stdio.error("parameter info analyze failed, error message: {0}".format(e)) diff --git a/handler/analyzer/analyze_sql.py b/handler/analyzer/analyze_sql.py index e6ab6374..ded4f21b 100644 --- a/handler/analyzer/analyze_sql.py +++ b/handler/analyzer/analyze_sql.py @@ -30,6 +30,7 @@ from handler.analyzer.sql.meta.sys_tenant_meta import SysTenantMeta from handler.gather.gather_scenes import GatherSceneHandler from common.command import get_observer_version +from result_type import ObdiagResult class AnalyzeSQLHandler(object): @@ -208,16 +209,16 @@ def handle(self): self.start_time = time.time() if not self.init_option(): self.stdio.error('init option failed') - return False + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="init option failed") if not self.init_inner_config(): self.stdio.error('init inner config failed') - return False + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="init inner config failed") if not self.init_config(): self.stdio.error('init config failed') - return False + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="init config failed") if not self.init_ob_version(): self.stdio.error('init ob version failed') - return False + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="init ob version failed") self.init_db_connector() self.local_store_path = os.path.join(self.local_stored_parrent_path, "obdiag_analyze_sql_result_{0}_{1}.html".format(TimeUtils.timestamp_to_filename_time(self.from_timestamp), TimeUtils.timestamp_to_filename_time(self.to_timestamp))) self.stdio.print("use {0} as result store path.".format(self.local_store_path)) diff --git a/handler/analyzer/analyze_variable.py b/handler/analyzer/analyze_variable.py index e3bbc5d9..9199e77a 100644 --- a/handler/analyzer/analyze_variable.py +++ b/handler/analyzer/analyze_variable.py @@ -24,6 +24,8 @@ import datetime from colorama import Fore, Style +from result_type import ObdiagResult + class AnalyzeVariableHandler(object): def __init__(self, context, analyze_type='diff'): @@ -55,10 +57,10 @@ def __init__(self, context, analyze_type='diff'): def handle(self): if not self.init_option(): self.stdio.error('init option failed') - return False + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="init option failed") self.stdio.verbose("Use {0} as pack dir.".format(self.export_report_path)) DirectoryUtil.mkdir(path=self.export_report_path, stdio=self.stdio) - self.execute() + return self.execute() def check_file_valid(self): with open(self.variable_file_name, 'r') as f: @@ -149,11 +151,13 @@ def analyze_variable(self): self.stdio.print(Fore.RED + "Since {0}, the following variables have changed:".format(last_gather_time) + Style.RESET_ALL) self.stdio.print(report_default_tb.get_string()) self.stdio.print("Analyze variables changed finished. For more details, please run cmd '" + Fore.YELLOW + " cat {0} ".format(file_name) + Style.RESET_ALL + "'") + return ObdiagResult(ObdiagResult.SUCCESS_CODE, data={"result": report_default_tb.get_string()}) else: self.stdio.print("Analyze variables changed finished. Since {0}, No changes in variables".format(last_gather_time)) + return ObdiagResult(ObdiagResult.SUCCESS_CODE, data={"result": "Since {0}, No changes in variables".format(last_gather_time)}) def execute(self): try: - self.analyze_variable() + return self.analyze_variable() except Exception as e: self.stdio.error("variable info analyze failed, error message: {0}".format(e)) diff --git a/handler/checker/check_handler.py b/handler/checker/check_handler.py index e74b9ccf..1e899562 100644 --- a/handler/checker/check_handler.py +++ b/handler/checker/check_handler.py @@ -101,7 +101,6 @@ def __init__(self, context, check_target_type="observer"): # add ssher ssher = None try: - # ssher = SshHelper(True, node.get("ip"), node.get("ssh_username"), node.get("ssh_password"), node.get("ssh_port"), node.get("ssh_key_file"), node) ssher = SshClient(context, node) except Exception as e: self.stdio.warn("StepBase get SshHelper fail on{0} ,Exception: {1}".format(node.get("ip"), e)) @@ -113,7 +112,6 @@ def __init__(self, context, check_target_type="observer"): # add OBConnectorPool try: obConnectorPool = checkOBConnectorPool(context, 3, self.cluster) - except Exception as e: self.stdio.warn("obConnector init error. Error info is {0}".format(e)) finally: @@ -243,6 +241,7 @@ def execute(self): t_report = self.execute_one(task) self.report.add_task_report(t_report) self.report.export_report() + return self.report.report_tobeMap() except CheckrReportException as e: self.stdio.error("Report error :{0}".format(e)) self.stdio.verbose(traceback.format_exc()) diff --git a/handler/checker/check_list.py b/handler/checker/check_list.py index 53ab952c..8a5d6451 100644 --- a/handler/checker/check_list.py +++ b/handler/checker/check_list.py @@ -20,6 +20,7 @@ import yaml from common.tool import Util +from result_type import ObdiagResult class CheckListHandler: @@ -29,41 +30,56 @@ def __init__(self, context): self.work_path = os.path.expanduser(self.context.inner_config["check"]["work_path"] or "~/.obdiag/check") def handle(self): - self.stdio.verbose("list check cases") - entries = os.listdir(self.work_path) - files = [f for f in entries if os.path.isfile(os.path.join(self.work_path, f))] - for file in files: - if "check_package" in file: - cases_map = {"all": {"name": "all", "command": "obdiag check", "info_en": "default check all task without filter", "info_cn": "默认执行除filter组里的所有巡检项"}} - # Obtain which files match and corresponding header files - # Using string segmentation methods - parts = file.split('_') - if len(parts) < 1: - self.stdio.warn("invalid check package name :{0} , Please don't add file, which 'check_package' in the name".format(file)) - continue - target = parts[0] - file = "{0}/{1}".format(self.work_path, file) - package_file_data = None - # read yaml file - with open(file, 'r') as f: - package_file_data = yaml.safe_load(f) - if not package_file_data or len(package_file_data) == 0: - self.stdio.warn("No data check package data :{0} ".format(file)) + try: + self.stdio.verbose("list check cases") + entries = os.listdir(self.work_path) + files = [f for f in entries if os.path.isfile(os.path.join(self.work_path, f))] + result_map = {} + for file in files: + if "check_package" in file: + cases_map = {"all": {"name": "all", "command": "obdiag check", "info_en": "default check all task without filter", "info_cn": "默认执行除filter组里的所有巡检项"}} + # Obtain which files match and corresponding header files + # Using string segmentation methods + parts = file.split('_') + if len(parts) < 1: + self.stdio.warn("invalid check package name :{0} , Please don't add file, which 'check_package' in the name".format(file)) continue - for package_data in package_file_data: - if package_data == "filter": + target = parts[0] + file = "{0}/{1}".format(self.work_path, file) + package_file_data = None + # read yaml file + with open(file, 'r') as f: + package_file_data = yaml.safe_load(f) + result_map[target] = {} + result_map[target]["commands"] = [] + if not package_file_data or len(package_file_data) == 0: + self.stdio.warn("No data check package data :{0} ".format(file)) continue - package_target = target - if target == "observer": - package_target = "cases" - else: - package_target = "{0}_cases".format(target) + for package_data in package_file_data: + if package_data == "filter": + continue + package_target = target + if target == "observer": + package_target = "cases" + else: + package_target = "{0}_cases".format(target) - cases_map[package_data] = { - "name": package_data, - "command": "obdiag check --{0}={1}".format(package_target, package_data), - "info_en": package_file_data[package_data].get("info_en") or "", - "info_cn": package_file_data[package_data].get("info_cn") or "", - } - Util.print_title("check cases about {0}".format(target)) - Util.print_scene(cases_map) + cases_map[package_data] = { + "name": package_data, + "command": "obdiag check --{0}={1}".format(package_target, package_data), + "info_en": package_file_data[package_data].get("info_en") or "", + "info_cn": package_file_data[package_data].get("info_cn") or "", + } + result_map[target]["commands"].append( + { + "name": package_data, + "command": "obdiag check --{0}={1}".format(package_target, package_data), + "info_en": package_file_data[package_data].get("info_en") or "", + "info_cn": package_file_data[package_data].get("info_cn") or "", + } + ) + Util.print_title("check cases about {0}".format(target), stdio=self.stdio) + Util.print_scene(cases_map, stdio=self.stdio) + return ObdiagResult(ObdiagResult.SUCCESS_CODE, data=result_map) + except Exception as e: + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data=str(e)) diff --git a/handler/checker/tasks/observer/bugs/bug_385.yaml b/handler/checker/tasks/observer/bugs/bug_385.yaml new file mode 100644 index 00000000..255a5ece --- /dev/null +++ b/handler/checker/tasks/observer/bugs/bug_385.yaml @@ -0,0 +1,14 @@ +info: "OB version [4.2.1.0,4.2.1.3] If tenants have multiple root users. Please consider upgrading the OceanBase version or removing the redundant users. github issue #385" +task: + - version: "[4.2.1.0,4.2.1.3]" + steps: + - type: sql + sql: "SELECT GROUP_CONCAT(TENANT_ID) AS TENANT_ID +FROM oceanbase.CDB_OB_USERS +WHERE USER_NAME = 'root' +GROUP BY TENANT_ID +HAVING COUNT(*) > 1;" + result: + set_value: TENANT_ID + verify: '[ -z "$TENANT_ID" ]' + err_msg: "tenant: #{$TENANT_ID}. These tenants have multiple root users. Please consider upgrading the OceanBase version or removing the redundant users. Please get bug's on https://github.com/oceanbase/obdiag/issues/385" diff --git a/handler/checker/tasks/observer/cluster/task_opt_stat.yaml b/handler/checker/tasks/observer/cluster/task_opt_stat.yaml new file mode 100644 index 00000000..7d926e82 --- /dev/null +++ b/handler/checker/tasks/observer/cluster/task_opt_stat.yaml @@ -0,0 +1,16 @@ +info: 'Check whether data_dir and log_dir_disk are on the same disk.' +task: + - version: "[4.2.0.0,*]" + steps: + - type: sql + sql: 'SELECT GROUP_CONCAT(DISTINCT TENANT_ID) +FROM oceanbase.__all_tenant t +WHERE NOT EXISTS(SELECT 1 + FROM oceanbase.__all_virtual_task_opt_stat_gather_history h + WHERE TYPE = 1 + AND start_time > date_sub(now(), interval 1 day) + AND h.tenant_id = t.tenant_id);' + result: + set_value: failed_scheduler_tenant_id + verify: '[ -n "${failed_scheduler_tenant_id}" ]' + err_msg: "failed_scheduler_tenant_id is exists. Please check the tenant_ids: #{failed_scheduler_tenant_id}" \ No newline at end of file diff --git a/handler/checker/tasks/observer/system/parameter.yaml b/handler/checker/tasks/observer/system/parameter.yaml index 38fc7fd7..3be8519f 100644 --- a/handler/checker/tasks/observer/system/parameter.yaml +++ b/handler/checker/tasks/observer/system/parameter.yaml @@ -176,7 +176,7 @@ task: set_value: parameter report_type: warning verify: "[ 6573688 -le ${parameter} ]" - err_msg: 'fs.file-max: #{parameter}. recommended: #{parameter} is ≥ 6573688.' + err_msg: 'fs.file-max: #{parameter}. recommended: is ≥ 6573688.' - type: get_system_parameter parameter: fs.pipe-user-pages-soft result: diff --git a/handler/gather/gather_ash_report.py b/handler/gather/gather_ash_report.py index fc1e4eb1..6cd91510 100644 --- a/handler/gather/gather_ash_report.py +++ b/handler/gather/gather_ash_report.py @@ -22,6 +22,7 @@ from common.ob_connector import OBConnector from common.obdiag_exception import OBDIAGFormatException, OBDIAGException from common.tool import DirectoryUtil, TimeUtils, Util, StringUtils +from result_type import ObdiagResult from stdio import SafeStdio from colorama import Fore, Style @@ -60,13 +61,13 @@ def __init__(self, context, gather_pack_dir='./'): def handle(self): if not self.version_check(): self.stdio.error('version check failed') - return False + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="version check failed") if not self.init_option(): self.stdio.error('init option failed') - return False + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="init option failed") self.__init_report_path() self.execute() - self.__print_result() + return self.__print_result() def version_check(self): observer_version = "" @@ -189,3 +190,4 @@ def init_option(self): def __print_result(self): self.stdio.print(Fore.YELLOW + "\nGather ash_report results stored in this directory: {0}".format(self.report_path) + Style.RESET_ALL) self.stdio.print("") + return ObdiagResult(ObdiagResult.SUCCESS_CODE, data={"store_dir": self.report_path}) diff --git a/handler/gather/gather_awr.py b/handler/gather/gather_awr.py index bec5b9e6..ae6b60f5 100644 --- a/handler/gather/gather_awr.py +++ b/handler/gather/gather_awr.py @@ -27,6 +27,7 @@ from common.tool import Util from common.tool import TimeUtils from common.ocp import ocp_task, ocp_api +from result_type import ObdiagResult class GatherAwrHandler(object): @@ -100,7 +101,8 @@ def handle_awr_from_ocp(ocp_url, cluster_name): # 将汇总结果持久化记录到文件中 FileUtil.write_append(os.path.join(pack_dir_this_command, "result_summary.txt"), summary_tuples) - return gather_tuples, gather_pack_path_dict + # return gather_tuples, gather_pack_path_dict + return ObdiagResult(ObdiagResult.SUCCESS_CODE, data={"store_dir": pack_dir_this_command}) def __download_report(self, store_path, name, report_id): """ diff --git a/handler/gather/gather_log.py b/handler/gather/gather_log.py index 80f6cb81..5246ed16 100644 --- a/handler/gather/gather_log.py +++ b/handler/gather/gather_log.py @@ -29,6 +29,7 @@ from common.tool import DirectoryUtil from common.tool import FileUtil from common.tool import NetUtils +from result_type import ObdiagResult class GatherLogHandler(BaseShellHandler): @@ -168,7 +169,7 @@ def handle_from_node(node): # Persist the summary results to a file FileUtil.write_append(os.path.join(pack_dir_this_command, "result_summary.txt"), summary_tuples) last_info = "For result details, please run cmd \033[32m' cat {0} '\033[0m\n".format(os.path.join(pack_dir_this_command, "result_summary.txt")) - return True + return ObdiagResult(ObdiagResult.SUCCESS_CODE, data={"store_dir": pack_dir_this_command}) def __handle_from_node(self, pack_dir_this_command, node): resp = {"skip": False, "error": "", "zip_password": "", "gather_pack_path": ""} diff --git a/handler/gather/gather_obadmin.py b/handler/gather/gather_obadmin.py index a7c3da04..93c6ece2 100644 --- a/handler/gather/gather_obadmin.py +++ b/handler/gather/gather_obadmin.py @@ -31,6 +31,7 @@ from common.tool import DirectoryUtil from common.tool import FileUtil from common.tool import NetUtils +from result_type import ObdiagResult class GatherObAdminHandler(BaseShellHandler): @@ -116,10 +117,10 @@ def init_option(self): def handle(self): if not self.init_option(): self.stdio.error('init option failed') - return False + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="init option failed") if not self.init_config(): self.stdio.error('init config failed') - return False + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="init config failed") pack_dir_this_command = os.path.join(self.local_stored_path, "obdiag_gather_pack_{0}".format(TimeUtils.timestamp_to_filename_time(self.gather_timestamp))) self.stdio.verbose("Use {0} as pack dir.".format(pack_dir_this_command)) gather_tuples = [] @@ -152,6 +153,7 @@ def handle_from_node(node): FileUtil.write_append(os.path.join(pack_dir_this_command, "result_summary.txt"), summary_tuples) last_info = "For result details, please run cmd \033[32m' cat {0} '\033[0m\n".format(os.path.join(pack_dir_this_command, "result_summary.txt")) + return ObdiagResult(ObdiagResult.SUCCESS_CODE, data={"store_dir": pack_dir_this_command}) def __handle_from_node(self, local_stored_path, node): resp = {"skip": False, "error": "", "gather_pack_path": ""} diff --git a/handler/gather/gather_obproxy_log.py b/handler/gather/gather_obproxy_log.py index ee32c8b7..1550505d 100644 --- a/handler/gather/gather_obproxy_log.py +++ b/handler/gather/gather_obproxy_log.py @@ -31,6 +31,7 @@ from common.tool import FileUtil from common.tool import NetUtils from common.tool import TimeUtils +from result_type import ObdiagResult class GatherObProxyLogHandler(BaseShellHandler): @@ -136,10 +137,10 @@ def init_option(self): def handle(self): if not self.init_option(): self.stdio.error('init option failed') - return False + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="init option failed") if not self.init_config(): self.stdio.error('init config failed') - return False + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="init config failed") if self.is_scene: pack_dir_this_command = self.gather_pack_dir else: @@ -171,7 +172,7 @@ def handle_from_node(node): self.pack_dir_this_command = pack_dir_this_command FileUtil.write_append(os.path.join(pack_dir_this_command, "result_summary.txt"), summary_tuples) last_info = "For result details, please run cmd \033[32m' cat {0} '\033[0m\n".format(os.path.join(pack_dir_this_command, "result_summary.txt")) - return True + return ObdiagResult(ObdiagResult.SUCCESS_CODE, data={"store_dir": pack_dir_this_command}) def __handle_from_node(self, node, pack_dir_this_command): resp = {"skip": False, "error": "", "zip_password": "", "gather_pack_path": ""} diff --git a/handler/gather/gather_obstack2.py b/handler/gather/gather_obstack2.py index 9b0e8084..4a922f6b 100644 --- a/handler/gather/gather_obstack2.py +++ b/handler/gather/gather_obstack2.py @@ -32,6 +32,7 @@ from common.tool import FileUtil from common.tool import NetUtils from common.tool import StringUtils +from result_type import ObdiagResult class GatherObstack2Handler(BaseShellHandler): @@ -77,10 +78,10 @@ def init_option(self): def handle(self): if not self.init_option(): self.stdio.error('init option failed') - return False + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="init option failed") if not self.init_config(): self.stdio.error('init config failed') - return False + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="init config failed") if self.is_scene: pack_dir_this_command = self.local_stored_path else: @@ -104,6 +105,7 @@ def handle_from_node(node): # Persist the summary results to a file FileUtil.write_append(os.path.join(pack_dir_this_command, "result_summary.txt"), summary_tuples) last_info = "For result details, please run cmd \033[32m' cat {0} '\033[0m\n".format(os.path.join(pack_dir_this_command, "result_summary.txt")) + return ObdiagResult(ObdiagResult.SUCCESS_CODE, data={"store_dir": pack_dir_this_command}) def __handle_from_node(self, local_stored_path, node): resp = {"skip": False, "error": "", "gather_pack_path": ""} diff --git a/handler/gather/gather_parameters.py b/handler/gather/gather_parameters.py index 187fb779..359ff423 100644 --- a/handler/gather/gather_parameters.py +++ b/handler/gather/gather_parameters.py @@ -23,6 +23,8 @@ import csv from colorama import Fore, Style +from result_type import ObdiagResult + class GatherParametersHandler(object): def __init__(self, context, gather_pack_dir='./'): @@ -53,13 +55,14 @@ def __init__(self, context, gather_pack_dir='./'): def handle(self): if not self.init_option(): self.stdio.error('init option failed') - return False + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="init option failed") # example of the format of pack dir for this command: (gather_pack_dir)/gather_pack_20190610123344 pack_dir_this_command = os.path.join(self.gather_pack_dir, "gather_parameters") self.stdio.verbose("Use {0} as pack dir.".format(pack_dir_this_command)) DirectoryUtil.mkdir(path=pack_dir_this_command, stdio=self.stdio) self.gather_pack_dir = pack_dir_this_command self.execute() + return ObdiagResult(ObdiagResult.SUCCESS_CODE, data={"gather_pack_dir": pack_dir_this_command}) def init_option(self): options = self.context.options diff --git a/handler/gather/gather_perf.py b/handler/gather/gather_perf.py index db792d3d..61d71450 100644 --- a/handler/gather/gather_perf.py +++ b/handler/gather/gather_perf.py @@ -30,6 +30,7 @@ from common.tool import FileUtil from common.tool import NetUtils from common.tool import TimeUtils +from result_type import ObdiagResult class GatherPerfHandler(BaseShellHandler): @@ -79,10 +80,10 @@ def init_option(self): def handle(self): if not self.init_option(): self.stdio.error('init option failed') - return False + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="init option failed") if not self.init_config(): self.stdio.error('init config failed') - return False + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="init config failed") if self.is_scene: pack_dir_this_command = self.local_stored_path else: @@ -113,6 +114,7 @@ def handle_from_node(node): # Persist the summary results to a file FileUtil.write_append(os.path.join(pack_dir_this_command, "result_summary.txt"), summary_tuples) last_info = "For result details, please run cmd \033[32m' cat {0} '\033[0m\n".format(os.path.join(pack_dir_this_command, "result_summary.txt")) + return ObdiagResult(ObdiagResult.SUCCESS_CODE, data={"store_dir": pack_dir_this_command}) def __handle_from_node(self, node, local_stored_path): resp = {"skip": False, "error": "", "gather_pack_path": ""} diff --git a/handler/gather/gather_plan_monitor.py b/handler/gather/gather_plan_monitor.py index 6d248bcf..7ab0ba73 100644 --- a/handler/gather/gather_plan_monitor.py +++ b/handler/gather/gather_plan_monitor.py @@ -33,6 +33,7 @@ from common.tool import FileUtil from common.tool import TimeUtils from handler.gather.gather_tabledump import GatherTableDumpHandler +from result_type import ObdiagResult class GatherPlanMonitorHandler(object): @@ -95,10 +96,10 @@ def __init_db_connector(self): def handle(self): if not self.init_config(): self.stdio.error('init config failed') - return False + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="init config failed") if not self.init_option(): self.stdio.error('init option failed') - return False + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="init option failed") if self.is_scene: pack_dir_this_command = self.local_stored_path else: @@ -219,7 +220,8 @@ def handle_plan_monitor_from_ob(cluster_name): self.stdio.print(summary_tuples) # 将汇总结果持久化记录到文件中 FileUtil.write_append(os.path.join(pack_dir_this_command, "result_summary.txt"), summary_tuples) - return gather_tuples, gather_pack_path_dict + # return gather_tuples, gather_pack_path_dict + return ObdiagResult(ObdiagResult.SUCCESS_CODE, data={"store_dir": pack_dir_this_command}) def __init_db_conn(self, env): try: diff --git a/handler/gather/gather_scenes.py b/handler/gather/gather_scenes.py index d54e2f57..55a17d91 100644 --- a/handler/gather/gather_scenes.py +++ b/handler/gather/gather_scenes.py @@ -18,6 +18,7 @@ import os import re +from result_type import ObdiagResult from stdio import SafeStdio import datetime from handler.gather.scenes.base import SceneBase @@ -65,19 +66,19 @@ def init_config(self): def handle(self): if not self.init_option(): self.stdio.error('init option failed') - return False + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="init option failed") if not self.init_config(): self.stdio.error('init config failed') - return False + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="init config failed") self.__init_variables() self.__init_report_path() self.__init_task_names() self.execute() if self.is_inner: result = self.__get_sql_result() - return result + return ObdiagResult(ObdiagResult.SUCCESS_CODE, data={"store_dir": self.report_path}) else: - self.__print_result() + return ObdiagResult(ObdiagResult.SUCCESS_CODE, data={"store_dir": self.report_path}) def execute(self): try: @@ -231,3 +232,4 @@ def __get_sql_result(self): def __print_result(self): self.stdio.print(Fore.YELLOW + "\nGather scene results stored in this directory: {0}\n".format(self.report_path) + Style.RESET_ALL) + return self.report_path diff --git a/handler/gather/gather_sysstat.py b/handler/gather/gather_sysstat.py index a77dff57..f6aea7c2 100644 --- a/handler/gather/gather_sysstat.py +++ b/handler/gather/gather_sysstat.py @@ -29,6 +29,7 @@ from common.tool import FileUtil from common.tool import NetUtils from common.tool import TimeUtils +from result_type import ObdiagResult class GatherOsInfoHandler(BaseShellHandler): @@ -80,10 +81,10 @@ def init_option(self): def handle(self): if not self.init_option(): self.stdio.error('init option failed') - return False + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, "init option failed") if not self.init_config(): self.stdio.error('init config failed') - return False + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, "init config failed") if self.is_scene: pack_dir_this_command = self.local_stored_path @@ -114,6 +115,7 @@ def handle_from_node(node): self.stdio.print(summary_tuples) # Persist the summary results to a file FileUtil.write_append(os.path.join(pack_dir_this_command, "result_summary.txt"), summary_tuples) + return ObdiagResult(ObdiagResult.SUCCESS_CODE, data={"store_dir": pack_dir_this_command}) def __handle_from_node(self, node, local_stored_path): resp = {"skip": False, "error": "", "gather_pack_path": ""} diff --git a/handler/gather/gather_tabledump.py b/handler/gather/gather_tabledump.py index afecfd24..88ac6078 100644 --- a/handler/gather/gather_tabledump.py +++ b/handler/gather/gather_tabledump.py @@ -18,6 +18,8 @@ import os import time + +from result_type import ObdiagResult from stdio import SafeStdio from common.ob_connector import OBConnector from common.tool import StringUtils @@ -99,10 +101,12 @@ def handle(self): self.start_time = time.time() if not self.init(): self.stdio.error('init failed') - return False + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="init failed") excute_status = self.execute() if not self.is_innner and excute_status: self.__print_result() + return ObdiagResult(ObdiagResult.SUCCESS_CODE, data={"store_dir": self.store_dir}) + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="execute failed") def execute(self): try: @@ -253,3 +257,4 @@ def __print_result(self): self.stdio.print("\nAnalyze SQL Summary:") self.stdio.print(table) self.stdio.print("\n") + return diff --git a/handler/gather/gather_variables.py b/handler/gather/gather_variables.py index 6c49b538..970e5ad2 100644 --- a/handler/gather/gather_variables.py +++ b/handler/gather/gather_variables.py @@ -22,6 +22,8 @@ import csv from colorama import Fore, Style +from result_type import ObdiagResult + class GatherVariablesHandler(object): def __init__(self, context, gather_pack_dir='./'): @@ -52,12 +54,13 @@ def __init__(self, context, gather_pack_dir='./'): def handle(self): if not self.init_option(): self.stdio.error('init option failed') - return False + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="init option failed") pack_dir_this_command = os.path.join(self.gather_pack_dir, "gather_variables") self.stdio.verbose("Use {0} as pack dir.".format(pack_dir_this_command)) DirectoryUtil.mkdir(path=pack_dir_this_command, stdio=self.stdio) self.gather_pack_dir = pack_dir_this_command self.execute() + return ObdiagResult(ObdiagResult.SUCCESS_CODE, data={"store_dir": pack_dir_this_command}) def init_option(self): options = self.context.options diff --git a/handler/gather/scenes/list.py b/handler/gather/scenes/list.py index d4a970cf..099e13a8 100644 --- a/handler/gather/scenes/list.py +++ b/handler/gather/scenes/list.py @@ -17,6 +17,8 @@ """ import os + +from result_type import ObdiagResult from stdio import SafeStdio from common.tool import YamlUtils from handler.gather.scenes.register import hardcode_scene_list @@ -44,8 +46,9 @@ def handle(self): self.stdio.verbose("len of observer_tasks: {0}; len of observer_tasks: {1}; len of observer_tasks: {2};".format(len(self.observer_tasks), len(self.obproxy_tasks), len(self.other_tasks))) if (len(self.observer_tasks) + len(self.obproxy_tasks) + len(self.other_tasks)) == 0: self.stdio.error("Failed to find any tasks") + return ObdiagResult(ObdiagResult.INPUT_ERROR_CODE, error_data="Failed to find any tasks") else: - self.print_scene_data() + return self.print_scene_data() def get_all_yaml_tasks(self): try: @@ -116,18 +119,24 @@ def print_scene_data(self): sorted_observer_tasks_dict = {} sorted_obproxy_tasks_dict = {} sorted_other_tasks_dict = {} + result_data = {} if self.other_tasks: sorted_other_tasks = sorted(self.other_tasks.items(), key=lambda x: x[0]) sorted_other_tasks_dict = {k: v for k, v in sorted_other_tasks} - Util.print_title("Other Problem Gather Scenes") - Util.print_scene(sorted_other_tasks_dict) + Util.print_title("Other Problem Gather Scenes", stdio=self.stdio) + Util.print_scene(sorted_other_tasks_dict, stdio=self.stdio) + result_data["sorted_other_tasks"] = sorted_other_tasks_dict if self.obproxy_tasks: sorted_obproxy_tasks = sorted(self.obproxy_tasks.items(), key=lambda x: x[0]) sorted_obproxy_tasks_dict = {k: v for k, v in sorted_obproxy_tasks} - Util.print_title("Obproxy Problem Gather Scenes") - Util.print_scene(sorted_obproxy_tasks_dict) + Util.print_title("Obproxy Problem Gather Scenes", stdio=self.stdio) + Util.print_scene(sorted_obproxy_tasks_dict, stdio=self.stdio) + result_data["sorted_obproxy_tasks"] = sorted_obproxy_tasks_dict + if self.observer_tasks: sorted_observer_tasks = sorted(self.observer_tasks.items(), key=lambda x: x[0]) sorted_observer_tasks_dict = {k: v for k, v in sorted_observer_tasks} - Util.print_title("Observer Problem Gather Scenes") - Util.print_scene(sorted_observer_tasks_dict) + Util.print_title("Observer Problem Gather Scenes", stdio=self.stdio) + Util.print_scene(sorted_observer_tasks_dict, stdio=self.stdio) + result_data["sorted_observer_tasks"] = sorted_observer_tasks_dict + return ObdiagResult(ObdiagResult.SUCCESS_CODE, data=result_data) diff --git a/handler/rca/rca_handler.py b/handler/rca/rca_handler.py index 4699c37d..1fe3a14e 100644 --- a/handler/rca/rca_handler.py +++ b/handler/rca/rca_handler.py @@ -33,6 +33,8 @@ from common.tool import StringUtils from colorama import Fore, Style +from result_type import ObdiagResult + class RCAHandler: def __init__(self, context): @@ -173,7 +175,7 @@ def execute(self): self.rca_scene.execute() except RCANotNeedExecuteException as e: self.stdio.warn("rca_scene.execute not need execute: {0}".format(e)) - pass + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, data="rca_scene.execute not need execute: {0}") except Exception as e: raise Exception("rca_scene.execute err: {0}".format(e)) try: @@ -181,6 +183,7 @@ def execute(self): except Exception as e: raise Exception("rca_scene.export_result err: {0}".format(e)) self.stdio.print("rca finished. For more details, the result on '" + Fore.YELLOW + self.get_result_path() + Style.RESET_ALL + "' \nYou can get the suggest by '" + Fore.YELLOW + "cat " + self.get_result_path() + "/record" + Style.RESET_ALL + "'") + return ObdiagResult(ObdiagResult.SUCCESS_CODE, data={"store_dir": self.get_result_path(), "record": self.rca_scene.Result.records_data()}) class RcaScene: @@ -277,6 +280,14 @@ def export(self): f.write(record.export_suggest()) f.write("\n") + def records_data(self): + records_data = [] + for record in self.records: + if record.records is None or len(record.records) == 0: + continue + records_data.append({"record": record.records, "suggest": record.suggest}) + return records_data + class RCA_ResultRecord: def __init__(self, stdio=None): diff --git a/handler/rca/rca_list.py b/handler/rca/rca_list.py index 71b94be4..bd6c3914 100644 --- a/handler/rca/rca_list.py +++ b/handler/rca/rca_list.py @@ -19,6 +19,7 @@ from common.constant import const from common.tool import DynamicLoading from common.tool import Util +from result_type import ObdiagResult class RcaScenesListHandler: @@ -62,10 +63,11 @@ def handle(self): try: self.stdio.verbose("list rca scenes") scene_info_list, scene_itme_list = self.get_all_scenes() - Util.print_scene(scene_info_list) + Util.print_scene(scene_info_list, stdio=self.stdio) + return ObdiagResult(ObdiagResult.SUCCESS_CODE, data=scene_info_list) except Exception as e: self.stdio.error("RcaScenesListHandler Exception: {0}".format(e)) - raise e + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="RcaScenesListHandler Exception:".format(e)) def __find_rca_files(self): files = [] diff --git a/handler/rca/scene/lock_conflict_scene.py b/handler/rca/scene/lock_conflict_scene.py index b5a1b5d0..f05095fe 100644 --- a/handler/rca/scene/lock_conflict_scene.py +++ b/handler/rca/scene/lock_conflict_scene.py @@ -68,7 +68,7 @@ def __execute_4_2(self): trans_record.add_record("get holding_lock trans_id:{0}".format(trans_id)) holding_lock_session_id = trans_id self.stdio.verbose("get holding lock SESSION_ID by trans_id:{0}".format(trans_id)) - cursor_by_trans_id = self.ob_connector.execute_sql_return_cursor_dictionary('select * from oceanbase.GV$OB_TRANSACTION_PARTICIPANTS where TX_ID="{0}";'.format(holding_lock_session_id)) + cursor_by_trans_id = self.ob_connector.execute_sql_return_cursor_dictionary('select * from oceanbase.GV$OB_TRANSACTION_PARTICIPANTS where TX_ID="{0}" and SESSION_ID<>0;'.format(holding_lock_session_id)) holding_lock_session_id_datas = cursor_by_trans_id.fetchall() holding_lock_session_id = "not get" self.stdio.verbose("get sql_info by holding_lock_session_id:{0}".format(holding_lock_session_id_datas)) @@ -82,7 +82,7 @@ def __execute_4_2(self): wait_lock_trans_id = OB_LOCKS_data["TRANS_ID"] trans_record.add_record("wait_lock_trans_id is {0}".format(wait_lock_trans_id)) - cursor_by_trans_id = self.ob_connector.execute_sql_return_cursor_dictionary('select * from oceanbase.GV$OB_TRANSACTION_PARTICIPANTS where TX_ID="{0}";'.format(wait_lock_trans_id)) + cursor_by_trans_id = self.ob_connector.execute_sql_return_cursor_dictionary('select * from oceanbase.GV$OB_TRANSACTION_PARTICIPANTS where TX_ID="{0}" and SESSION_ID<>0;'.format(wait_lock_trans_id)) wait_lock_session_datas = cursor_by_trans_id.fetchall() self.stdio.verbose("get sql_info by holding_lock_session_id:{0}".format(holding_lock_session_id)) diff --git a/init.sh b/init.sh index 2c007a5d..bb4f6eec 100755 --- a/init.sh +++ b/init.sh @@ -33,7 +33,6 @@ if [ -d "${WORK_DIR}/rca" ]; then cp -rf ${WORK_DIR}/rca ${OBDIAG_HOME}/ fi - ALIAS_OBDIAG_EXIST=$(grep "alias obdiag='sh" ~/.bashrc | head -n 1) if [[ "${ALIAS_OBDIAG_EXIST}" != "" ]]; then echo "need update obdiag alias" @@ -46,3 +45,4 @@ if [ -d "${OBDIAG_HOME}/check_package.yaml" ]; then echo "${OBDIAG_HOME}/*check_package.yaml and ${OBDIAG_HOME}/tasks has been discarded. If you have made any changes to these files on your own, please transfer the relevant data to *check_package.yaml in ${OBDIAG_HOME}/check/" fi echo "Init obdiag finished" +cd - diff --git a/result_type.py b/result_type.py new file mode 100644 index 00000000..71f30b01 --- /dev/null +++ b/result_type.py @@ -0,0 +1,45 @@ +import json + + +class ObdiagResult: + # ObdiagResult is the result of obdiag. + # It contains the code and result of obdiag. + + # SERVER_ERROR_CODE(5xx) is the code of server error. + SERVER_ERROR_CODE = 500 + # INPUT_ERROR_CODE(4xx) is the code of input error. + INPUT_ERROR_CODE = 400 + # SUCCESS_CODE(200) is the code of success. + SUCCESS_CODE = 200 + + def __init__(self, code, data=None, error_data=None): + self.command = None + self.trace_id = None + self.data = data + self.error_data = error_data + if code is None: + raise TypeError("ObdiagResult code is None. Please contact the Oceanbase community. ") + self.code = code + if data is not None: + if isinstance(data, dict): + self.data = data + else: + raise TypeError("ObdiagResult data is not dict. Please contact the Oceanbase community. ") + if error_data is not None: + if isinstance(error_data, str): + self.error_data = error_data + else: + raise TypeError("ObdiagResult error_data is not str. Please contact the Oceanbase community. ") + + def set_trace_id(self, trace_id): + self.trace_id = "{0}".format(trace_id) + + def set_command(self, command): + self.command = command + + def get_result(self): + result = {"code": self.code, "data": self.data, "error_data": self.error_data, "trace_id": self.trace_id, "command": self.command} + return json.dumps(result, ensure_ascii=False) + + def get_code(self): + return self.code diff --git a/stdio.py b/stdio.py index bdb10105..5a192ea9 100644 --- a/stdio.py +++ b/stdio.py @@ -84,6 +84,13 @@ def flush(self): return True +class SetBufferIO(BufferIO): + + def write(self, s): + if s not in self._buffer: + return super(SetBufferIO, self).write(s) + + class SysStdin(object): NONBLOCK = False @@ -147,6 +154,7 @@ def _readline(cls): try: for line in sys.stdin: return line + return '' except IOError: return '' finally: @@ -359,7 +367,8 @@ class IO(object): WARNING_PREV = FormtatText.warning('[WARN]') ERROR_PREV = FormtatText.error('[ERROR]') - def __init__(self, level, msg_lv=MsgLevel.DEBUG, use_cache=False, track_limit=0, root_io=None, input_stream=SysStdin, output_stream=sys.stdout, error_stream=sys.stdout): + def __init__(self, level, msg_lv=MsgLevel.DEBUG, use_cache=False, track_limit=0, root_io=None, input_stream=SysStdin, output_stream=sys.stdout, error_stream=sys.stdout, silent=False): + self.silent = silent self.level = level self.msg_lv = msg_lv self.default_confirm = False @@ -378,12 +387,17 @@ def __init__(self, level, msg_lv=MsgLevel.DEBUG, use_cache=False, track_limit=0, self._cur_out_obj = None self._cur_err_obj = None self._before_critical = None + self._exit_msg = "" self._output_is_tty = False self._input_is_tty = False + self._exit_buffer = SetBufferIO() self.set_input_stream(input_stream) self.set_output_stream(output_stream) self.set_err_stream(error_stream) + def set_silent(self, silent=False): + self.silent = bool(silent) + def isatty(self): if self._root_io: return self._root_io.isatty() @@ -488,8 +502,20 @@ def before_close(self): except: pass + @property + def exit_msg(self): + return self._exit_msg + + @exit_msg.setter + def exit_msg(self, msg): + self._exit_msg = msg + def _close(self): self.before_close() + self._flush_cache() + if self.exit_msg: + self.print(self.exit_msg) + self.exit_msg = "" self._flush_log() def __del__(self): @@ -533,6 +559,11 @@ def get_cur_out_obj(self): return self._root_io.get_cur_out_obj() return self._cur_out_obj + def get_exit_buffer(self): + if self._root_io: + return self._root_io.get_exit_buffer() + return self._exit_buffer + def _start_buffer_io(self): if self._root_io: return False @@ -606,6 +637,8 @@ def _stop_sync_obj(self, sync_clz, stop_type, *arg, **kwargs): return ret def start_loading(self, text, *arg, **kwargs): + if self.silent: + return True if self.sync_obj: return False self.sync_obj = self._start_sync_obj(IOHalo, lambda x: x.stop_loading('fail'), *arg, **kwargs) @@ -614,6 +647,8 @@ def start_loading(self, text, *arg, **kwargs): return self.sync_obj.start(text) def stop_loading(self, stop_type, *arg, **kwargs): + if self.silent: + return True if not isinstance(self.sync_obj, IOHalo): return False if getattr(self.sync_obj, stop_type, False): @@ -677,15 +712,18 @@ def print_list(self, ary, field_names=None, exp=lambda x: x if isinstance(x, (li def read(self, msg='', blocked=False): if msg: - self._print(MsgLevel.INFO, msg) - return self.get_input_stream().read(blocked) + if self.syncing: + self.verbose(msg, end='') + else: + self._print(MsgLevel.INFO, msg, end='') + return self.get_input_stream().readline(not self.syncing and blocked) def confirm(self, msg): - msg = '%s [y/n]: ' % msg - self.print(msg, end='') if self.default_confirm: - self.verbose("default confirm: True") + self.verbose("%s and then auto confirm yes" % msg) return True + msg = '%s [y/n]: ' % msg + self.print(msg, end='') if self.isatty() and not self.syncing: while True: try: @@ -697,6 +735,7 @@ def confirm(self, msg): except Exception as e: if not e: return False + self.print(msg, end='') else: self.verbose("isatty: %s, syncing: %s, auto confirm: False" % (self.isatty(), self.syncing)) return False @@ -714,13 +753,26 @@ def _print(self, msg_lv, msg, *args, **kwargs): del kwargs['prev_msg'] else: print_msg = msg - if msg_lv == MsgLevel.ERROR: - kwargs['file'] = self.get_cur_err_obj() + if kwargs.get('_on_exit'): + kwargs['file'] = self.get_exit_buffer() + del kwargs['_on_exit'] + else: + if msg_lv == MsgLevel.ERROR: + kwargs['file'] = self.get_cur_err_obj() + else: + kwargs['file'] = self.get_cur_out_obj() + if '_disable_log' in kwargs: + enaable_log = not kwargs['_disable_log'] + del kwargs['_disable_log'] + else: + enaable_log = True + # if self.silent is True, Not print to stream + if self.silent: + pass else: - kwargs['file'] = self.get_cur_out_obj() - kwargs['file'] and print(self._format(print_msg, *args), **kwargs) + kwargs['file'] and print(self._format(print_msg, *args), **kwargs) del kwargs['file'] - self.log(msg_lv, msg, *args, **kwargs) + enaable_log and self.log(msg_lv, msg, *args, **kwargs) def log(self, levelno, msg, *args, **kwargs): self._cache_log(levelno, msg, *args, **kwargs) @@ -745,6 +797,12 @@ def _log(self, levelno, msg, *args, **kwargs): if self.trace_logger: self.trace_logger.log(levelno, msg, *args, **kwargs) + def _flush_cache(self): + if not self._root_io: + text = self._exit_buffer.read() + if text: + self.print(text, _disable_log=True) + def print(self, msg, *args, **kwargs): self._print(MsgLevel.INFO, msg, *args, **kwargs) @@ -770,16 +828,6 @@ def verbose(self, msg, *args, **kwargs): return self._print(MsgLevel.VERBOSE, '%s %s' % (self._verbose_prefix, msg), *args, **kwargs) - def print_result_json(self, result): - - if not result: - return - if isinstance(result, dict): - result = json.dumps(result, indent=4) - self.print(result) - - pass - if sys.version_info.major == 2: def exception(self, msg='', *args, **kwargs): diff --git a/update/update.py b/update/update.py index 125a230b..c9b73ae6 100644 --- a/update/update.py +++ b/update/update.py @@ -26,6 +26,8 @@ from common.version import OBDIAG_VERSION import yaml +from result_type import ObdiagResult + # for update obdiag files without obdiag class UpdateHandler: @@ -55,17 +57,16 @@ def execute(self): local_update_file_name = os.path.expanduser('~/.obdiag/data.tar') local_update_log_file_name = os.path.expanduser('~/.obdiag/data_version.yaml') if file_path and file_path != "": - self.handle_update_offline(file_path) - return + return self.handle_update_offline(file_path) if NetUtils.network_connectivity(remote_server) is False: self.stdio.warn("[update] network connectivity failed. Please check your network connection.") - return + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="network connectivity failed. Please check your network connection.") NetUtils.download_file(remote_version_file_name, os.path.expanduser(local_version_file_name)) with open(local_version_file_name, 'r') as file: remote_data = yaml.safe_load(file) if remote_data.get("obdiag_version") is None: self.stdio.warn("obdiag_version is None. Do not perform the upgrade process.") - return + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="obdiag_version is None. Do not perform the upgrade process.") else: self.remote_obdiag_version = remote_data["obdiag_version"].strip() if StringUtils.compare_versions_greater(self.remote_obdiag_version, self.local_obdiag_version): @@ -74,10 +75,15 @@ def execute(self): "remote_obdiag_version>local_obdiag_version. Unable to update dependency files, please upgrade " "obdiag. Do not perform the upgrade process.".format(self.remote_obdiag_version, self.local_obdiag_version) ) - return + return ObdiagResult( + ObdiagResult.SERVER_ERROR_CODE, + error_data="remote_obdiag_version is {0}. local_obdiag_version is {1}. " + "remote_obdiag_version>local_obdiag_version. Unable to update dependency files, please upgrade " + "obdiag. Do not perform the upgrade process.".format(self.remote_obdiag_version, self.local_obdiag_version), + ) if remote_data.get("remote_tar_sha") is None: self.stdio.warn("remote_tar_sha is None. Do not perform the upgrade process.") - return + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="remote_tar_sha is None. Do not perform the upgrade process.") else: self.remote_tar_sha = remote_data["remote_tar_sha"] # need update? @@ -88,7 +94,7 @@ def execute(self): local_data = yaml.safe_load(file) if local_data.get("remote_tar_sha") is not None and local_data.get("remote_tar_sha") == self.remote_tar_sha: self.stdio.warn("[update] remote_tar_sha as local_tar_sha. No need to update.") - return + return ObdiagResult(ObdiagResult.SUCCESS_CODE, data={"msg": "remote_tar_sha as local_tar_sha. No need to update."}) # get data_update_time if local_data.get("data_update_time") is not None and time.time() - local_data["data_update_time"] < 3600 * 24 * 7: self.stdio.warn("[update] data_update_time No need to update.") @@ -123,9 +129,10 @@ def execute(self): with open(os.path.expanduser("~/.obdiag/data_version.yaml"), 'w') as f: yaml.dump({"data_update_time": int(time.time()), "remote_tar_sha": self.remote_tar_sha}, f) self.stdio.print("[update] Successfully updated. The original data is stored in the *. d folder.") - return + return ObdiagResult(ObdiagResult.SUCCESS_CODE, data={"msg": "Successfully updated. The original data is stored in the *. d folder."}) except Exception as e: self.stdio.warn('[update] Failed to update. Error message: {0}'.format(e)) + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="Failed to update. Error message: {0}".format(e)) def handle_update_offline(self, file): file = os.path.expanduser(file) @@ -133,10 +140,10 @@ def handle_update_offline(self, file): self.local_update_file_sha = FileUtil.calculate_sha256(file) if os.path.exists(file) is False: self.stdio.error('{0} does not exist.'.format(file)) - return + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="{0} does not exist.".format(file)) if not file.endswith('.tar'): self.stdio.error('{0} is not a tar file.'.format(file)) - return + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="{0} is not a tar file.".format(file)) ## check_old_files if os.path.exists(os.path.expanduser("~/.obdiag/check.d")): shutil.rmtree(os.path.expanduser("~/.obdiag/check.d")) @@ -147,7 +154,6 @@ def handle_update_offline(self, file): shutil.rmtree(os.path.expanduser("~/.obdiag/gather.d")) if os.path.exists(os.path.expanduser("~/.obdiag/gather")): os.rename(os.path.expanduser("~/.obdiag/gather"), os.path.expanduser("~/.obdiag/gather.d")) - ## rca if os.path.exists(os.path.expanduser("~/.obdiag/rca.d")): shutil.rmtree(os.path.expanduser("~/.obdiag/rca.d")) @@ -159,3 +165,4 @@ def handle_update_offline(self, file): with open(os.path.expanduser("~/.obdiag/data_version.yaml"), 'w') as f: yaml.dump({"data_update_time": int(time.time()), "remote_tar_sha": self.remote_tar_sha}, f) self.stdio.print("[update] Successfully updated. The original data is stored in the *. d folder.") + return ObdiagResult(ObdiagResult.SUCCESS_CODE, data={"msg": "Successfully updated. The original data is stored in the *. d folder."}) From f8ab11b2f2ad5d58a7c1e309d8c9da4353ac2afa Mon Sep 17 00:00:00 2001 From: "jingshun.tq" <35712518+Teingi@users.noreply.github.com> Date: Mon, 26 Aug 2024 11:52:54 +0800 Subject: [PATCH 49/68] config can be passed entirely through parameters (#394) * update version to 2.4.0 * Remove duplicate scripts * The configuration can be passed entirely through parameters * fix * fix --- common/config_helper.py | 4 +- common/tool.py | 87 ++++++++++++++++++++++ config.py | 25 ++++--- core.py | 11 +-- diag_cmd.py | 37 +++++++--- test/common/test_config_parse.py | 122 +++++++++++++++++++++++++++++++ 6 files changed, 254 insertions(+), 32 deletions(-) create mode 100644 test/common/test_config_parse.py diff --git a/common/config_helper.py b/common/config_helper.py index a27e5ccd..e34fb63c 100644 --- a/common/config_helper.py +++ b/common/config_helper.py @@ -104,8 +104,8 @@ def build_configuration(self): global_ssh_port = self.input_with_default("oceanbase host ssh_port", "22") global_home_path = self.input_with_default("oceanbase install home_path", const.OB_INSTALL_DIR_DEFAULT) default_data_dir = os.path.join(global_home_path, "store") - global_data_dir = self.input_with_default("oceanbase data_dir", default_data_dir) - global_redo_dir = self.input_with_default("oceanbase redo_dir", default_data_dir) + global_data_dir = default_data_dir + global_redo_dir = default_data_dir tenant_sys_config = {"user": self.sys_tenant_user, "password": self.sys_tenant_password} global_config = {"ssh_username": global_ssh_username, "ssh_password": global_ssh_password, "ssh_port": global_ssh_port, "ssh_key_file": "", "home_path": global_home_path, "data_dir": global_data_dir, "redo_dir": global_redo_dir} new_config = {"obcluster": {"ob_cluster_name": ob_cluster_name, "db_host": self.db_host, "db_port": self.db_port, "tenant_sys": tenant_sys_config, "servers": {"nodes": nodes_config, "global": global_config}}} diff --git a/common/tool.py b/common/tool.py index 0ab14007..797d19d0 100644 --- a/common/tool.py +++ b/common/tool.py @@ -214,6 +214,93 @@ def passwd_format(passwd): return "'{}'".format(passwd.replace("'", "'\"'\"'")) +class ConfigOptionsParserUtil(object): + def __init__(self): + self.config_dict = {} + self.key_mapping = { + 'db_host': 'obcluster.db_host', + 'db_port': 'obcluster.db_port', + 'tenant_sys.user': 'obcluster.tenant_sys.user', + 'tenant_sys.password': 'obcluster.tenant_sys.password', + 'ssh_username': 'obcluster.servers.global.ssh_username', + 'ssh_password': 'obcluster.servers.global.ssh_password', + 'ssh_port': 'obcluster.servers.global.ssh_port', + 'home_path': 'obcluster.servers.global.home_path', + 'obproxy_home_path': 'obproxy.servers.global.home_path', + } + + def set_nested_value(self, d, keys, value): + """Recursively set the value in a nested dictionary.""" + if len(keys) > 1: + if 'nodes' in keys[0]: + try: + # Handle nodes + parts = keys[0].split('[') + base_key = parts[0] + index = int(parts[1].rstrip(']')) + if base_key not in d: + d[base_key] = [] + while len(d[base_key]) <= index: + d[base_key].append({}) + self.set_nested_value(d[base_key][index], keys[1:], value) + except (IndexError, ValueError) as e: + raise ValueError(f"Invalid node index in key '{keys[0]}'") from e + else: + if keys[0] not in d: + d[keys[0]] = {} + d[keys[0]] = self.set_nested_value(d[keys[0]], keys[1:], value) + else: + d[keys[0]] = value + return d + + def parse_config(self, input_array): + for item in input_array: + try: + key, value = item.split('=', 1) + # Map short keys to full keys if needed + if key in self.key_mapping: + key = self.key_mapping[key] + keys = key.split('.') + self.set_nested_value(self.config_dict, keys, value) + except ValueError: + raise ValueError(f"Invalid input format for item '{item}'") + + self.config_dict = self.add_default_values(self.config_dict) + return self.config_dict + + def add_default_values(self, d): + if isinstance(d, dict): + for k, v in d.items(): + if k == 'login': + if 'password' not in v: + v['password'] = '' + elif k == 'tenant_sys': + if 'password' not in v: + v['password'] = '' + elif k == 'global': + if 'ssh_username' not in v: + v['ssh_username'] = '' + if 'ssh_password' not in v: + v['ssh_password'] = '' + elif k == 'servers': + # Ensure 'nodes' is present and initialized as an empty list + if 'nodes' not in v: + v['nodes'] = [] + if 'global' not in v: + v['global'] = {} + self.add_default_values(v['global']) + for node in v['nodes']: + if isinstance(node, dict): + self.add_default_values(node) + elif isinstance(v, dict): + self.add_default_values(v) + elif isinstance(v, list): + for node in v: + if isinstance(node, dict): + self.add_default_values(node) + return d + + class DirectoryUtil(object): @staticmethod diff --git a/config.py b/config.py index bd32cbbe..6cbc1a92 100644 --- a/config.py +++ b/config.py @@ -17,7 +17,7 @@ from __future__ import absolute_import, division, print_function import os -from common.tool import DirectoryUtil +from common.tool import ConfigOptionsParserUtil, DirectoryUtil from stdio import SafeStdio import oyaml as yaml import pathlib @@ -148,17 +148,20 @@ def load_config_with_defaults(self, defaults_dict): class ConfigManager(Manager): - def __init__(self, config_file=None, stdio=None): + def __init__(self, config_file=None, stdio=None, config_env_list=[]): default_config_path = os.path.join(os.path.expanduser("~"), ".obdiag", "config.yml") - - if config_file is None or not os.path.exists(config_file): - config_file = default_config_path - pathlib.Path(os.path.dirname(default_config_path)).mkdir(parents=True, exist_ok=True) - with open(default_config_path, 'w') as f: - f.write(DEFAULT_CONFIG_DATA) - super(ConfigManager, self).__init__(config_file, stdio) - self.config_file = config_file - self.config_data = self.load_config() + if config_env_list is None or len(config_env_list) == 0: + if config_file is None or not os.path.exists(config_file): + config_file = default_config_path + pathlib.Path(os.path.dirname(default_config_path)).mkdir(parents=True, exist_ok=True) + with open(default_config_path, 'w') as f: + f.write(DEFAULT_CONFIG_DATA) + super(ConfigManager, self).__init__(config_file, stdio) + self.config_file = config_file + self.config_data = self.load_config() + else: + parser = ConfigOptionsParserUtil() + self.config_data = parser.parse_config(config_env_list) def _safe_get(self, dictionary, *keys, default=None): """Safe way to retrieve nested values from dictionaries""" diff --git a/core.py b/core.py index 95cb1206..675a7720 100644 --- a/core.py +++ b/core.py @@ -57,13 +57,12 @@ from colorama import Fore, Style from common.config_helper import ConfigHelper -from common.tool import Util from common.tool import TimeUtils class ObdiagHome(object): - def __init__(self, stdio=None, config_path=os.path.expanduser('~/.obdiag/config.yml'), inner_config_change_map=None): + def __init__(self, stdio=None, config_path=os.path.expanduser('~/.obdiag/config.yml'), inner_config_change_map=None, custom_config_env_list=None): self._optimize_manager = None self.stdio = None self._stdio_func = None @@ -80,13 +79,7 @@ def __init__(self, stdio=None, config_path=os.path.expanduser('~/.obdiag/config. if self.inner_config_manager.config.get("obdiag") is not None and self.inner_config_manager.config.get("obdiag").get("logger") is not None and self.inner_config_manager.config.get("obdiag").get("logger").get("silent") is not None: stdio.set_silent(self.inner_config_manager.config.get("obdiag").get("logger").get("silent")) self.set_stdio(stdio) - if config_path: - if os.path.exists(os.path.abspath(config_path)): - config_path = config_path - else: - stdio.error('The option you provided with -c: {0} is not exist.'.format(config_path)) - return - self.config_manager = ConfigManager(config_path, stdio) + self.config_manager = ConfigManager(config_path, stdio, custom_config_env_list) if ( self.inner_config_manager.config.get("obdiag") is not None and self.inner_config_manager.config.get("obdiag").get("basic") is not None diff --git a/diag_cmd.py b/diag_cmd.py index 95d5dc14..79c0b0cc 100644 --- a/diag_cmd.py +++ b/diag_cmd.py @@ -261,14 +261,8 @@ def do_command(self): ROOT_IO.verbose('cmd: %s' % self.prev_cmd) ROOT_IO.verbose('opts: %s' % self.opts) config_path = os.path.expanduser('~/.obdiag/config.yml') - custom_config = Util.get_option(self.opts, 'c') - if custom_config: - if os.path.exists(os.path.abspath(custom_config)): - config_path = custom_config - else: - ROOT_IO.error('The option you provided with -c: {0} is a non-existent configuration file path.'.format(custom_config)) - return - obdiag = ObdiagHome(stdio=ROOT_IO, config_path=custom_config, inner_config_change_map=self.inner_config_change_map) + custom_config_env_list = Util.get_option(self.opts, 'config') + obdiag = ObdiagHome(stdio=ROOT_IO, config_path=config_path, inner_config_change_map=self.inner_config_change_map, custom_config_env_list=custom_config_env_list) obdiag.set_options(self.opts) obdiag.set_cmds(self.cmds) ret = self._do_command(obdiag) @@ -417,6 +411,7 @@ def __init__(self): self.parser.add_option('--encrypt', type='string', help="Whether the returned results need to be encrypted, choices=[true, false]", default="false") self.parser.add_option('--store_dir', type='string', help='the dir to store gather result, current dir by default.', default='./') self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) + self.parser.add_option('--config', action="append", type="string", help='config options Format: --config key=value') def init(self, cmd, args): super(ObdiagGatherAllCommand, self).init(cmd, args) @@ -439,6 +434,7 @@ def __init__(self): self.parser.add_option('--encrypt', type='string', help="Whether the returned results need to be encrypted, choices=[true, false]", default="false") self.parser.add_option('--store_dir', type='string', help='the dir to store gather result, current dir by default.', default='./') self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) + self.parser.add_option('--config', action="append", type="string", help='config options Format: --config key=value') def init(self, cmd, args): super(ObdiagGatherLogCommand, self).init(cmd, args) @@ -455,6 +451,7 @@ def __init__(self): super(ObdiagGatherParameterCommand, self).__init__('parameter', 'Gather oceanbase parameters from oceanbase database') self.parser.add_option('--store_dir', type='string', help='the dir to store gather result, current dir by default.', default='./') self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) + self.parser.add_option('--config', action="append", type="string", help='config options Format: --config key=value') def init(self, cmd, args): super(ObdiagGatherParameterCommand, self).init(cmd, args) @@ -471,6 +468,7 @@ def __init__(self): super(ObdiagGatherVariableCommand, self).__init__('variable', 'Gather oceanbase variables from oceanbase database') self.parser.add_option('--store_dir', type='string', help='the dir to store gather result, current dir by default.', default='./') self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) + self.parser.add_option('--config', action="append", type="string", help='config options Format: --config key=value') def init(self, cmd, args): super(ObdiagGatherVariableCommand, self).init(cmd, args) @@ -487,6 +485,7 @@ def __init__(self): super(ObdiagGatherSysStatCommand, self).__init__('sysstat', 'Gather Host information') self.parser.add_option('--store_dir', type='string', help='the dir to store gather result, current dir by default.', default='./') self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) + self.parser.add_option('--config', action="append", type="string", help='config options Format: --config key=value') def init(self, cmd, args): super(ObdiagGatherSysStatCommand, self).init(cmd, args) @@ -504,6 +503,7 @@ def __init__(self): self.parser.add_option('--store_dir', type='string', help='the dir to store gather result, current dir by default.', default='./') self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) + self.parser.add_option('--config', action="append", type="string", help='config options Format: --config key=value') def init(self, cmd, args): super(ObdiagGatherStackCommand, self).init(cmd, args) @@ -522,6 +522,7 @@ def __init__(self): self.parser.add_option('--store_dir', type='string', help='the dir to store gather result, current dir by default.', default='./') self.parser.add_option('--scope', type='string', help="perf type constrains, choices=[sample, flame, pstack, all]", default='all') self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) + self.parser.add_option('--config', action="append", type="string", help='config options Format: --config key=value') def init(self, cmd, args): super(ObdiagGatherPerfCommand, self).init(cmd, args) @@ -542,6 +543,7 @@ def __init__(self): self.parser.add_option('--encrypt', type='string', help="Whether the returned results need to be encrypted, choices=[true, false]", default="false") self.parser.add_option('--store_dir', type='string', help='the dir to store gather result, current dir by default.', default='./') self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) + self.parser.add_option('--config', action="append", type="string", help='config options Format: --config key=value') def init(self, cmd, args): super(ObdiagGatherSlogCommand, self).init(cmd, args) @@ -562,6 +564,7 @@ def __init__(self): self.parser.add_option('--encrypt', type='string', help="Whether the returned results need to be encrypted, choices=[true, false]", default="false") self.parser.add_option('--store_dir', type='string', help='the dir to store gather result, current dir by default.', default='./') self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) + self.parser.add_option('--config', action="append", type="string", help='config options Format: --config key=value') def init(self, cmd, args): super(ObdiagGatherClogCommand, self).init(cmd, args) @@ -583,6 +586,7 @@ def __init__(self): self.parser.add_option('--since', type='string', help="Specify time range that from 'n' [d]ays, 'n' [h]ours or 'n' [m]inutes. before to now. format: . example: 1h.", default='30m') self.parser.add_option('--store_dir', type='string', help='the dir to store gather result, current dir by default.', default='./') self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) + self.parser.add_option('--config', action="append", type="string", help='config options Format: --config key=value') def init(self, cmd, args): super(ObdiagGatherAwrCommand, self).init(cmd, args) @@ -601,6 +605,7 @@ def __init__(self): self.parser.add_option('--store_dir', type='string', help='the dir to store gather result, current dir by default.', default='./') self.parser.add_option('--env', type='string', help='''env, eg: "{db_connect='-h127.0.0.1 -P2881 -utest@test -p****** -Dtest'}"''') self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) + self.parser.add_option('--config', action="append", type="string", help='config options Format: --config key=value') def init(self, cmd, args): super(ObdiagGatherPlanMonitorCommand, self).init(cmd, args) @@ -623,6 +628,7 @@ def __init__(self): self.parser.add_option('--encrypt', type='string', help="Whether the returned results need to be encrypted, choices=[true, false]", default="false") self.parser.add_option('--store_dir', type='string', help='the dir to store gather result, current dir by default.', default='./') self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) + self.parser.add_option('--config', action="append", type="string", help='config options Format: --config key=value') def init(self, cmd, args): super(ObdiagGatherObproxyLogCommand, self).init(cmd, args) @@ -657,6 +663,7 @@ def __init__(self): self.parser.add_option('--env', type='string', help='env, eg: "{env1=xxx, env2=xxx}"') self.parser.add_option('--store_dir', type='string', help='the dir to store gather result, current dir by default.', default='./') self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) + self.parser.add_option('--config', action="append", type="string", help='config options Format: --config key=value') def init(self, cmd, args): super(ObdiagGatherSceneRunCommand, self).init(cmd, args) @@ -678,8 +685,8 @@ def __init__(self): self.parser.add_option('--from', type='string', help="specify the start of the time range. format: 'yyyy-mm-dd hh:mm:ss'") self.parser.add_option('--to', type='string', help="specify the end of the time range. format: 'yyyy-mm-dd hh:mm:ss'") self.parser.add_option('--store_dir', type='string', help='the dir to store gather result, current dir by default.', default='./') - self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) + self.parser.add_option('--config', action="append", type="string", help='config options Format: --config key=value') def init(self, cmd, args): super(ObdiagGatherAshReportCommand, self).init(cmd, args) @@ -699,6 +706,7 @@ def __init__(self): self.parser.add_option('--password', type='string', help="The password for the database user. If not specified, an attempt will be made to connect without a password.", default='') self.parser.add_option('--store_dir', type='string', help='the dir to store gather result, current dir by default.', default='./obdiag_gather_report') self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) + self.parser.add_option('--config', action="append", type="string", help='config options Format: --config key=value') def init(self, cmd, args): super(ObdiagGatherTableDumpHandler, self).init(cmd, args) @@ -719,8 +727,9 @@ def __init__(self): self.parser.add_option('--log_level', type='string', help="oceanbase logs greater than or equal to this level will be analyze, choices=[DEBUG, TRACE, INFO, WDIAG, WARN, EDIAG, ERROR]") self.parser.add_option('--files', action="append", type='string', help="specify files") self.parser.add_option('--store_dir', type='string', help='the dir to store gather result, current dir by default.', default='./') - self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) self.parser.add_option('--since', type='string', help="Specify time range that from 'n' [d]ays, 'n' [h]ours or 'n' [m]inutes. before to now. format: . example: 1h.", default='30m') + self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) + self.parser.add_option('--config', action="append", type="string", help='config options Format: --config key=value') def init(self, cmd, args): super(ObdiagAnalyzeLogCommand, self).init(cmd, args) @@ -746,6 +755,7 @@ def __init__(self): self.parser.add_option('--output', type='string', help="Print the result to the maximum output line on the screen", default=60) self.parser.add_option('--store_dir', type='string', help='the dir to store gather result, current dir by default.', default='./') self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) + self.parser.add_option('--config', action="append", type="string", help='config options Format: --config key=value') def init(self, cmd, args): super(ObdiagAnalyzeFltTraceCommand, self).init(cmd, args) @@ -762,6 +772,7 @@ def __init__(self): self.parser.add_option('--file', type='string', help="specify initialization parameter file") self.parser.add_option('--store_dir', type='string', help='the dir to store gather result, current dir by default.', default='./') self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) + self.parser.add_option('--config', action="append", type="string", help='config options Format: --config key=value') def init(self, cmd, args): super(ObdiagAnalyzeParameterDiffCommand, self).init(cmd, args) @@ -778,6 +789,7 @@ def __init__(self): self.parser.add_option('--file', type='string', help="specify initialization parameter file") self.parser.add_option('--store_dir', type='string', help='the dir to store gather result, current dir by default.', default='./') self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) + self.parser.add_option('--config', action="append", type="string", help='config options Format: --config key=value') def init(self, cmd, args): super(ObdiagAnalyzeParameterDefaultCommand, self).init(cmd, args) @@ -801,6 +813,7 @@ def __init__(self): self.parser.add_option('--file', type='string', help="specify initialization parameter file") self.parser.add_option('--store_dir', type='string', help='the dir to store gather result, current dir by default.', default='./') self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) + self.parser.add_option('--config', action="append", type="string", help='config options Format: --config key=value') def init(self, cmd, args): super(ObdiagAnalyzeVariableDiffCommand, self).init(cmd, args) @@ -835,6 +848,7 @@ def __init__(self): self.parser.add_option('--store_dir', type='string', help='the dir to store result, current dir by default.', default='./obdiag_analyze/') self.parser.add_option('--elapsed_time', type='string', help='The minimum threshold for filtering execution time, measured in microseconds.', default=100000) self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) + self.parser.add_option('--config', action="append", type="string", help='config options Format: --config key=value') def init(self, cmd, args): super(ObdiagAnalyzeSQLCommand, self).init(cmd, args) @@ -858,6 +872,7 @@ def __init__(self): self.parser.add_option('--output', type='string', help="The format of the output results, choices=[json, html]", default='html') self.parser.add_option('--store_dir', type='string', help='the dir to store result, current dir by default.', default='./obdiag_analyze/') self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) + self.parser.add_option('--config', action="append", type="string", help='config options Format: --config key=value') def init(self, cmd, args): super(ObdiagAnalyzeSQLReviewCommand, self).init(cmd, args) @@ -877,6 +892,7 @@ def __init__(self): self.parser.add_option('--store_dir', type='string', help='the dir to store check result, current dir by default.', default='./check_report/') self.parser.add_option('--report_type', type='string', help='The type of the check report, support "table", "json", "xml", "yaml". default table', default='table') self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) + self.parser.add_option('--config', action="append", type="string", help='config options Format: --config key=value') def init(self, cmd, args): super(ObdiagCheckCommand, self).init(cmd, args) @@ -897,6 +913,7 @@ def __init__(self): self.parser.add_option('--store_dir', type='string', help='the dir to store rca result, current dir by default.', default='./rca/') self.parser.add_option('--input_parameters', action='callback', type='string', callback=self._input_parameters_scene, help='input parameters of scene') self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) + self.parser.add_option('--config', action="append", type="string", help='config options Format: --config key=value') self.scene_input_param_map = {} def _input_parameters_scene(self, option, opt_str, value, parser): diff --git a/test/common/test_config_parse.py b/test/common/test_config_parse.py new file mode 100644 index 00000000..2b47900e --- /dev/null +++ b/test/common/test_config_parse.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/8/6 +@file: test_config_parse.py +@desc: +""" + +import unittest +from common.tool import ConfigOptionsParserUtil + + +class TestConfigParser(unittest.TestCase): + + def setUp(self): + self.parser = ConfigOptionsParserUtil() + + def test_valid_input_case1(self): + input_array = [ + 'ocp.login.url=http://xx.xx.xx.xx:xx', + 'ocp.login.user=admin', + 'obcluster.ob_cluster_name=test', + 'obcluster.db_host=192.168.1.1', + 'obcluster.db_port=2881', + 'obcluster.tenant_sys.user=root@sys', + 'obcluster.servers.nodes[0].ip=192.168.1.1', + 'obcluster.servers.nodes[1].ip=192.168.1.2', + 'obcluster.servers.nodes[2].ip=192.168.1.3', + 'obcluster.servers.global.ssh_username=test', + 'obcluster.servers.global.ssh_password=test', + 'obcluster.servers.global.home_path=/root/observer', + 'obproxy.obproxy_cluster_name=obproxy', + 'obproxy.servers.nodes[0].ip=192.168.1.4', + 'obproxy.servers.nodes[1].ip=192.168.1.5', + 'obproxy.servers.nodes[2].ip=192.168.1.6', + 'obproxy.servers.global.ssh_username=test', + 'obproxy.servers.global.ssh_password=test', + 'obproxy.servers.global.home_path=/root/obproxy', + ] + + expected_output = { + 'ocp': {'login': {'url': 'http://xx.xx.xx.xx:xx', 'user': 'admin', 'password': ''}}, + 'obcluster': { + 'ob_cluster_name': 'test', + 'db_host': '192.168.1.1', + 'db_port': '2881', + 'tenant_sys': {'user': 'root@sys', 'password': ''}, + 'servers': {'global': {'ssh_username': 'test', 'ssh_password': 'test', 'home_path': '/root/observer'}, 'nodes': [{'ip': '192.168.1.1'}, {'ip': '192.168.1.2'}, {'ip': '192.168.1.3'}]}, + }, + 'obproxy': {'obproxy_cluster_name': 'obproxy', 'servers': {'global': {'ssh_username': 'test', 'ssh_password': 'test', 'home_path': '/root/obproxy'}, 'nodes': [{'ip': '192.168.1.4'}, {'ip': '192.168.1.5'}, {'ip': '192.168.1.6'}]}}, + } + + parsed_config = self.parser.parse_config(input_array) + self.assertEqual(parsed_config, expected_output) + + def test_valid_input_case2(self): + input_array = [ + 'ocp.login.url=http://xx.xx.xx.xx:xx', + 'ocp.login.user=admin', + 'obcluster.ob_cluster_name=test', + 'obcluster.db_host=192.168.1.1', + 'obcluster.db_port=2881', + 'obcluster.tenant_sys.user=root@sys', + 'obcluster.servers.nodes[0].ip=192.168.1.1', + 'obcluster.servers.nodes[0].ssh_username=test2', + 'obcluster.servers.nodes[0].ssh_password=test2', + 'obcluster.servers.nodes[0].home_path=/root/test/observer', + 'obcluster.servers.nodes[1].ip=192.168.1.2', + 'obcluster.servers.nodes[2].ip=192.168.1.3', + 'obcluster.servers.global.ssh_username=test', + 'obcluster.servers.global.ssh_password=test', + 'obcluster.servers.global.home_path=/root/observer', + 'obproxy.obproxy_cluster_name=obproxy', + 'obproxy.servers.nodes[0].ip=192.168.1.4', + 'obproxy.servers.nodes[1].ip=192.168.1.5', + 'obproxy.servers.nodes[2].ip=192.168.1.6', + 'obproxy.servers.global.ssh_username=test', + 'obproxy.servers.global.ssh_password=test', + 'obproxy.servers.global.home_path=/root/obproxy', + ] + + expected_output = { + 'ocp': {'login': {'url': 'http://xx.xx.xx.xx:xx', 'user': 'admin', 'password': ''}}, + 'obcluster': { + 'ob_cluster_name': 'test', + 'db_host': '192.168.1.1', + 'db_port': '2881', + 'tenant_sys': {'user': 'root@sys', 'password': ''}, + 'servers': { + 'global': {'ssh_username': 'test', 'ssh_password': 'test', 'home_path': '/root/observer'}, + 'nodes': [{'home_path': '/root/test/observer', 'ip': '192.168.1.1', 'ssh_username': 'test2', 'ssh_password': 'test2'}, {'ip': '192.168.1.2'}, {'ip': '192.168.1.3'}], + }, + }, + 'obproxy': {'obproxy_cluster_name': 'obproxy', 'servers': {'global': {'ssh_username': 'test', 'ssh_password': 'test', 'home_path': '/root/obproxy'}, 'nodes': [{'ip': '192.168.1.4'}, {'ip': '192.168.1.5'}, {'ip': '192.168.1.6'}]}}, + } + + parsed_config = self.parser.parse_config(input_array) + self.assertEqual(parsed_config, expected_output) + + def test_invalid_format(self): + input_array = ['ocp.login.url=http://xx.xx.xx.xx:xx', 'invalid_format_string'] + with self.assertRaises(ValueError): + self.parser.parse_config(input_array) + + def test_invalid_node_index(self): + input_array = ['ocp.login.url=http://xx.xx.xx.xx:xx', 'obcluster.servers.nodes[not_a_number].ip=192.168.1.1'] + with self.assertRaises(ValueError): + self.parser.parse_config(input_array) + + +if __name__ == '__main__': + unittest.main() From e31eb0ea5e1273f91458d5201f11cfa51c2411ca Mon Sep 17 00:00:00 2001 From: xiaodong-ji Date: Mon, 26 Aug 2024 15:21:46 +0800 Subject: [PATCH 50/68] add analyze index space feature (#393) * add analyze index space * add report table * support silent report * add display unit --- core.py | 6 + diag_cmd.py | 18 +++ handler/analyzer/analyze_index_space.py | 183 ++++++++++++++++++++++++ 3 files changed, 207 insertions(+) create mode 100644 handler/analyzer/analyze_index_space.py diff --git a/core.py b/core.py index 675a7720..f96529da 100644 --- a/core.py +++ b/core.py @@ -36,6 +36,7 @@ from handler.analyzer.analyze_sql_review import AnalyzeSQLReviewHandler from handler.analyzer.analyze_parameter import AnalyzeParameterHandler from handler.analyzer.analyze_variable import AnalyzeVariableHandler +from handler.analyzer.analyze_index_space import AnalyzeIndexSpaceHandler from handler.checker.check_handler import CheckHandler from handler.checker.check_list import CheckListHandler from handler.gather.gather_log import GatherLogHandler @@ -311,6 +312,11 @@ def analyze_fuction(self, function_type, opt): self.set_context(function_type, 'analyze', config) handler = AnalyzeSQLReviewHandler(self.context) handler.handle() + elif function_type == 'analyze_index_space': + self.set_context(function_type, 'analyze', config) + handler = AnalyzeIndexSpaceHandler(self.context) + handler.handle() + return ObdiagResult(ObdiagResult.SUCCESS_CODE, data=handler.execute()) else: self._call_stdio('error', 'Not support analyze function: {0}'.format(function_type)) return ObdiagResult(ObdiagResult.INPUT_ERROR_CODE, error_data='Not support analyze function: {0}'.format(function_type)) diff --git a/diag_cmd.py b/diag_cmd.py index 79c0b0cc..2e0736bf 100644 --- a/diag_cmd.py +++ b/diag_cmd.py @@ -830,6 +830,23 @@ def __init__(self): self.register_command(ObdiagAnalyzeVariableDiffCommand()) +class ObdiagAnalyzeIndexSpaceCommand(ObdiagOriginCommand): + def __init__(self): + super(ObdiagAnalyzeIndexSpaceCommand, self).__init__('index_space', 'Analyze the space of existing or non-existent index and estimate it through the columns included in the index') + self.parser.add_option('--tenant_name', type='string', help="tenant name") + self.parser.add_option('--table_name', type='string', help="table name") + self.parser.add_option('--index_name', type='string', help="specify the index name if an index already exists in the table") + self.parser.add_option('--column_names', type='string', help="specify the column names of index that have not been created yet;eg:--column_names=c1,c2,c3") + + def init(self, cmd, args): + super(ObdiagAnalyzeIndexSpaceCommand, self).init(cmd, args) + self.parser.set_usage('%s [options]' % self.prev_cmd) + return self + + def _do_command(self, obdiag): + return obdiag.analyze_fuction('analyze_index_space', self.opts) + + class ObdiagAnalyzeSQLCommand(ObdiagOriginCommand): def __init__(self): @@ -1067,6 +1084,7 @@ def __init__(self): self.register_command(ObdiagAnalyzeFltTraceCommand()) self.register_command(ObdiagAnalyzeParameterCommand()) self.register_command(ObdiagAnalyzeVariableCommand()) + self.register_command(ObdiagAnalyzeIndexSpaceCommand()) # self.register_command(ObdiagAnalyzeSQLCommand()) # self.register_command(ObdiagAnalyzeSQLReviewCommand()) diff --git a/handler/analyzer/analyze_index_space.py b/handler/analyzer/analyze_index_space.py new file mode 100644 index 00000000..b638e5a8 --- /dev/null +++ b/handler/analyzer/analyze_index_space.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/8/19 +@file: analyze_index_space.py +@desc: +""" + + +import sys +from prettytable import PrettyTable +from common.tool import StringUtils, Util +from common.ob_connector import OBConnector +from common.command import get_observer_version + + +def translate_byte(B): + if B < 0: + B = -B + return '-' + translate_byte(B) + if B == 0: + return '0B' + units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'] + k = 1024 + i = 0 + while B >= k and i < len(units) - 1: + B /= k + i += 1 + return f"{B:.2f} {units[i]}" + + +class AnalyzeIndexSpaceHandler(object): + def __init__(self, context): + self.context = context + self.stdio = context.stdio + self.ob_version = get_observer_version(self.context) + self.sys_connector = None + self.tenant_id = None + self.table_id = None + self.index_id = None + self.column_names = [] + self.estimated_table_data = None + self.result_map_list = [] + + def init_option(self): + options = self.context.options + ob_cluster = self.context.cluster_config + self.stdio.verbose('cluster config: {0}'.format(StringUtils.mask_passwords(ob_cluster))) + self.ob_cluster = ob_cluster + self.sys_connector = OBConnector(ip=ob_cluster.get("db_host"), port=ob_cluster.get("db_port"), username=ob_cluster.get("tenant_sys").get("user"), password=ob_cluster.get("tenant_sys").get("password"), stdio=self.stdio, timeout=100) + tenant_name = Util.get_option(options, 'tenant_name') + table_name = Util.get_option(options, 'table_name') + index_name = Util.get_option(options, 'index_name') + column_names = Util.get_option(options, 'column_names') + # get tenant id + tenant_data = self.sys_connector.execute_sql("select tenant_id from oceanbase.__all_tenant where tenant_name = '{0}';".format(tenant_name)) + if len(tenant_data) == 0: + raise Exception("can not find tenant id by tenant name: {0}. Please check the tenant name.".format(tenant_name)) + self.tenant_id = tenant_data[0][0] + if self.tenant_id is None: + raise Exception("can not find tenant id by tenant name: {0}. Please check the tenant name.".format(tenant_name)) + # get table id + table_id_data = self.sys_connector.execute_sql("select table_id from oceanbase.__all_virtual_table where table_name = '{0}' and tenant_id = '{1}';".format(table_name, self.tenant_id)) + if len(table_id_data) == 0: + raise Exception("can not find table id by table name: {0}. Please check the table name.".format(table_name)) + self.table_id = table_id_data[0][0] + if self.table_id is None: + raise Exception("can not find table id by table name: {0}. Please check the table name.".format(table_name)) + # get index id + if index_name is not None: + index_id_data = self.sys_connector.execute_sql("select table_id from oceanbase.__all_virtual_table where table_name like '%{0}%' and data_table_id = '{1}' and tenant_id = '{2}';".format(index_name, self.table_id, self.tenant_id)) + if len(index_id_data) == 0: + raise Exception("can not find index id by index name: {0}. Please check the index name.".format(index_name)) + self.index_id = index_id_data[0][0] + if self.index_id is None: + raise Exception("can not find index id by index name: {0}. Please check the index name.".format(index_name)) + # get column names + if column_names is not None: + self.column_names = column_names.split(',') + if len(self.column_names) == 0: + raise Exception("--column_names parameter format is incorrect: {0}.".format(column_names)) + return True + + def handle(self): + try: + if not self.init_option(): + self.stdio.error('init option failed') + return False + # evaluate the space size of the table where the index is located + self.stdio.start_loading('start query estimated_table_data_size, please wait some minutes...') + sql = "select svr_ip, svr_port, sum(original_size) as estimated_table_size from oceanbase.__all_virtual_tablet_sstable_macro_info where tablet_id in (select tablet_id from oceanbase.__all_virtual_tablet_to_table_history where table_id = {0}) and (svr_ip, svr_port) in (select svr_ip, svr_port from oceanbase.__all_virtual_ls_meta_table where role = 1) group by svr_ip, svr_port;".format( + self.table_id + ) + self.stdio.verbose("execute_sql is {0}".format(sql)) + self.estimated_table_data = self.sys_connector.execute_sql_return_cursor_dictionary(sql).fetchall() + self.stdio.stop_loading('succeed') + # get the sum of all column lengths + sql = "select table_id, sum(data_length) as all_columns_length from oceanbase.__all_virtual_column_history where tenant_id = '{0}' and table_id = '{1}';".format(self.tenant_id, self.table_id) + self.stdio.verbose("execute_sql is {0}".format(sql)) + self.main_table_sum_of_data_length = int(self.sys_connector.execute_sql_return_cursor_dictionary(sql).fetchall()[0]["all_columns_length"]) + # get the sum of column lengths included in the index + if self.index_id is not None: + sql = "select table_id, sum(data_length) as index_columns_length from oceanbase.__all_virtual_column_history where tenant_id = '{0}' and table_id = '{1}';".format(self.tenant_id, self.index_id) + self.stdio.verbose("execute_sql is {0}".format(sql)) + self.index_table_sum_of_data_length = int(self.sys_connector.execute_sql_return_cursor_dictionary(sql).fetchall()[0]["index_columns_length"]) + elif len(self.column_names) != 0: + sql = "select table_id, sum(data_length) as columns_length from oceanbase.__all_virtual_column_history where tenant_id = '{0}' and table_id = '{1}' and column_name in ('{2}');".format( + self.tenant_id, self.table_id, "','".join(self.column_names) + ) + self.stdio.verbose("execute_sql is {0}".format(sql)) + self.index_table_sum_of_data_length = int(self.sys_connector.execute_sql_return_cursor_dictionary(sql).fetchall()[0]["columns_length"]) + else: + raise Exception("please specify an index or column.") + + # estimate the final space size + estimated_index_data = [] + for node_table_estimated_size in self.estimated_table_data: + node_estimated_index_data = {} + node_estimated_index_data["svr_ip"] = node_table_estimated_size["svr_ip"] + node_estimated_index_data["svr_port"] = node_table_estimated_size["svr_port"] + estimiated_index_size = int(self.index_table_sum_of_data_length / self.main_table_sum_of_data_length * int(node_table_estimated_size["estimated_table_size"])) + if self.ob_version == "4.2.3.0" or StringUtils.compare_versions_greater(self.ob_version, "4.2.3.0"): + self.stdio.verbose("magnification is 1.5") + target_server_estimated_size = int(estimiated_index_size * 15 / 10) + else: + self.stdio.verbose("magnification is 5.5") + target_server_estimated_size = int(estimiated_index_size * 55 / 10) + node_estimated_index_data["estimiated_index_size"] = target_server_estimated_size + estimated_index_data.append(node_estimated_index_data) + for node_estimated_index_data in estimated_index_data: + target_server_ip = node_estimated_index_data["svr_ip"] + target_server_port = node_estimated_index_data["svr_port"] + target_server_estimated_index_size = int(node_estimated_index_data["estimiated_index_size"]) + # get target_server_total_size and target_server_used_size + target_server_data = self.sys_connector.execute_sql_return_cursor_dictionary( + "select total_size, used_size from oceanbase.__all_virtual_disk_stat where svr_ip = '{0}' and svr_port = {1};".format(target_server_ip, target_server_port) + ).fetchall() + target_server_total_size = int(target_server_data[0]["total_size"]) + target_server_used_size = int(target_server_data[0]["used_size"]) + # get data_disk_usage_limit_percentage + sql = "SELECT VALUE FROM oceanbase.GV$OB_PARAMETERS WHERE SVR_IP='{0}' and SVR_PORT='{1}' and NAME LIKE \"data_disk_usage_limit_percentage\"".format(target_server_ip, target_server_port) + self.stdio.verbose("execute_sql is {0}".format(sql)) + data_disk_usage_limit_percentage = int(self.sys_connector.execute_sql_return_cursor_dictionary(sql).fetchall()[0]["VALUE"]) + # data_disk_usage_limit_percentage is a Cluster level configuration items + available_disk_space = int(target_server_total_size / 100 * data_disk_usage_limit_percentage - target_server_used_size) + node_result_map = {} + node_result_map["ip"] = target_server_ip + node_result_map["port"] = target_server_port + node_result_map["estimated_index_space"] = translate_byte(target_server_estimated_index_size) + node_result_map["available_disk_space"] = translate_byte(available_disk_space) + self.result_map_list.append(node_result_map) + self.export_report_table() + except Exception as e: + self.stdio.verbose("analyze index space error: {0}".format(e)) + sys.exit() + finally: + self.stdio.verbose("end analyze index space") + + def execute(self): + result_map = {} + result_map["result"] = self.result_map_list + return result_map + + def export_report_table(self): + try: + report_index_space_tb = PrettyTable(["ip", "port", "estimated_index_space", "available_disk_space"]) + report_index_space_tb.align["task_report"] = "l" + report_index_space_tb.title = "estimated-index-space-report" + for result in self.result_map_list: + report_index_space_tb.add_row([result["ip"], result["port"], result["estimated_index_space"], result["available_disk_space"]]) + self.stdio.print(report_index_space_tb) + except Exception as e: + raise Exception("export report {0}".format(e)) From 926f5e9790e54b55738bd0acaf4cef4986b8e3dd Mon Sep 17 00:00:00 2001 From: "jingshun.tq" <35712518+Teingi@users.noreply.github.com> Date: Mon, 26 Aug 2024 15:50:38 +0800 Subject: [PATCH 51/68] analyze index space add --config option (#398) * update version to 2.4.0 * Remove duplicate scripts * The configuration can be passed entirely through parameters * fix * fix * analyze index space add --config option --- diag_cmd.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/diag_cmd.py b/diag_cmd.py index 2e0736bf..2564d075 100644 --- a/diag_cmd.py +++ b/diag_cmd.py @@ -837,6 +837,8 @@ def __init__(self): self.parser.add_option('--table_name', type='string', help="table name") self.parser.add_option('--index_name', type='string', help="specify the index name if an index already exists in the table") self.parser.add_option('--column_names', type='string', help="specify the column names of index that have not been created yet;eg:--column_names=c1,c2,c3") + self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) + self.parser.add_option('--config', action="append", type="string", help='config options Format: --config key=value') def init(self, cmd, args): super(ObdiagAnalyzeIndexSpaceCommand, self).init(cmd, args) From fe589a727c0f66f6c9eaf44090a343e1287a2749 Mon Sep 17 00:00:00 2001 From: xuyan wang <35394786+wayyoungboy@users.noreply.github.com> Date: Mon, 26 Aug 2024 15:52:23 +0800 Subject: [PATCH 52/68] fix avx err_msg (#399) --- handler/checker/tasks/observer/system/instruction_set_avx2.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/handler/checker/tasks/observer/system/instruction_set_avx2.yaml b/handler/checker/tasks/observer/system/instruction_set_avx2.yaml index a17a1e00..acd6d842 100644 --- a/handler/checker/tasks/observer/system/instruction_set_avx2.yaml +++ b/handler/checker/tasks/observer/system/instruction_set_avx2.yaml @@ -6,6 +6,6 @@ task: result: set_value: cpu_flags verify: " [[ $cpu_flags == *avx2* ]] " - err_msg: 'clock_source: #{clock_source}. recommended: tsc. Uneven CPU utilization during pressure testing resulted in low TPS during pressure testing' + err_msg: 'observer need cpu support avx2. If the cpu is not support avx2, observer will be crash.' From 52e9cfbeb7f370426950b4dd56ec7d655ee1f7c7 Mon Sep 17 00:00:00 2001 From: "jingshun.tq" <35712518+Teingi@users.noreply.github.com> Date: Mon, 26 Aug 2024 16:54:44 +0800 Subject: [PATCH 53/68] update init_obdiag_cmd.sh for analyze index_space (#400) * update version to 2.4.0 * Remove duplicate scripts * The configuration can be passed entirely through parameters * fix * fix * analyze index space add --config option * fix --- init_obdiag_cmd.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init_obdiag_cmd.sh b/init_obdiag_cmd.sh index dac049c9..9a9a743b 100644 --- a/init_obdiag_cmd.sh +++ b/init_obdiag_cmd.sh @@ -19,7 +19,7 @@ _obdiag_completion() { ;; analyze) if [ "$COMP_CWORD" -eq 2 ]; then - type_list="log flt_trace parameter variable" + type_list="log flt_trace parameter variable index_space" elif [ "${COMP_WORDS[2]}" = "parameter" ] && [ "$COMP_CWORD" -eq 3 ]; then type_list="diff default" elif [ "${COMP_WORDS[2]}" = "variable" ] && [ "$COMP_CWORD" -eq 3 ]; then From a188bd5363737b7dd67ac4a475bccf7c6ebb1007 Mon Sep 17 00:00:00 2001 From: "jingshun.tq" <35712518+Teingi@users.noreply.github.com> Date: Tue, 27 Aug 2024 19:24:14 +0800 Subject: [PATCH 54/68] Support querying nodes through the tenant connection string and completing the node config (#401) * update version to 2.4.0 * Remove duplicate scripts * The configuration can be passed entirely through parameters * fix * fix * analyze index space add --config option * fix * Support querying nodes through the tenant connection string and completing the node config * ifx --- README-CN.md | 12 ++++++------ README.md | 12 ++++++------ common/tool.py | 22 ++++++++++++++++++++++ config.py | 8 ++++++++ core.py | 41 ++++++++++++++++++++++++++++++++++++++++- 5 files changed, 82 insertions(+), 13 deletions(-) diff --git a/README-CN.md b/README-CN.md index e9c8c9c3..902c56aa 100644 --- a/README-CN.md +++ b/README-CN.md @@ -78,12 +78,12 @@ obdiag 期望构建一个开放的社区,我们欢迎任何形式的贡献, |2.1.0|2024.04| 2024.05.13|
  • 根因分析场景扩展
  • 新增 ash 报告 采集
| |2.2.0|2024.05| 2024.06.14 |
  • 根因分析场景扩展
  • 巡检场景扩展
| |2.3.0|2024.06| 2024.07.24 |
  • 根因分析场景扩展
  • 新增基础采集功能: tabledump
  • 新增参数/变量比对分析功能
  • 执行底座改造,支持 k8s 部署的 OceanBase 集群诊断
| -|2.4.0|2024.07| - |
  • 易用性改造
  • 支持 SQL 诊断
| -|2.5.0|2024.08| - |
  • 根因分析场景扩展
  • 支持 OMS 诊断
| -|3.0.0|2024.09| - |
  • 根因分析场景扩展
  • 服务化改造
| -|3.1.0|2024.10| - |
  • 根因分析场景扩展
  • 支持巡检报告比对
| -|3.2.0|2024.11| - |
  • 根因分析场景扩展
  • SQL 诊断二期,支持SQL问题的根因分析
| -|3.3.0|2024.12| - |
  • AI 化探索
| +|2.4.0|2024.07| - |
  • 易用性改造
  • 索引空间分析
| +|2.5.0|2024.09| - |
  • 场景化一键展示集群诊断信息功能
  • 队列积压分析
| +|3.0.0|2024.10| - |
  • SQL 诊断
  • 支持 OMS 诊断
| +|3.1.0|2024.11| - |
  • 根因分析场景扩展
  • 支持巡检报告比对
| +|3.2.0|2024.12| - |
  • 根因分析场景扩展
  • SQL 诊断二期,支持SQL问题的根因分析
| +|4.0.0|2025.01| - |
  • AI 化探索
| # 支持 diff --git a/README.md b/README.md index 8dd16418..38d7a9e1 100644 --- a/README.md +++ b/README.md @@ -82,12 +82,12 @@ obdiag envisions an open community. We welcome your contributions in any form: |2.1.0|2024.04| 2024.05.13|
  • Root Cause Analysis Scenario Expansion
  • Gather ash report
| |2.2.0|2024.05| 2024.06.14 |
  • Root Cause Analysis Scenario Expansion
  • Check Scenario Expansion
| |2.3.0|2024.06| 2024.07.24 |
  • Root Cause Analysis Scenario Expansion
  • Added basic gather feature: tabledump
  • Added parameter/variable gather and analyze feature
  • Execute infrastructure modifications to support diagnostics for OceanBase clusters deployed on Kubernetes (k8s)
| -|2.4.0|2024.07| - |
  • usability improvement
  • Support SQL Diagnosis
| -|2.5.0|2024.08| - |
  • Root Cause Analysis Scenario Expansion
  • Support OMS Diagnosis
| -|3.0.0|2024.09| - |
  • Root Cause Analysis Scenario Expansion
  • Service-ification Transformation
| -|3.1.0|2024.10| - |
  • Root Cause Analysis Scenario Expansion
  • Supporting Comparative Functionality for Patrol Inspection Reports
| -|3.2.0|2024.11| - |
  • Root Cause Analysis Scenario Expansion
  • SQL Diagnosis Phase II, Supporting Root Cause Analysis for SQL problems
| -|3.3.0|2024.12| - |
  • AI for obdiag
| +|2.4.0|2024.07| - |
  • usability improvement
  • Index Space Size Analysis
| +|2.5.0|2024.08| - |
  • Cluster Diagnosis Information Display
  • Queue Analysis
| +|3.0.0|2024.10| - |
  • Root Cause Analysis Scenario Expansion
  • SQL Diagnosis
| +|3.1.0|2024.11| - |
  • Root Cause Analysis Scenario Expansion
  • Supporting Comparative Functionality for Patrol Inspection Reports
| +|3.2.0|2024.12| - |
  • Root Cause Analysis Scenario Expansion
  • SQL Diagnosis Phase II, Supporting Root Cause Analysis for SQL problems
| +|4.0.0|2025.01| - |
  • AI for obdiag
| # Support diff --git a/common/tool.py b/common/tool.py index 797d19d0..283f5679 100644 --- a/common/tool.py +++ b/common/tool.py @@ -1502,6 +1502,28 @@ def get_nodes_list(context, nodes, stdio=None): return new_nodes return None + @staticmethod + def check_none_values(config, stdio): + """ + Check if any values in the given configuration dictionary are None. + If any value is None, print the specific information and return False. + If all values are not None, return True. + + :param config: Dictionary containing configuration items + :return: True if no None values are found, otherwise False + """ + # First, check the top-level key-value pairs + for key, value in config.items(): + if value is None: + stdio.error("The value of '{0}' is None.".format(key)) + return False + + # If the value is a dictionary, recursively check the sub-dictionary + if isinstance(value, dict): + if not Util.check_none_values(value, stdio): + return False + return True + class SQLUtil(object): re_trace = re.compile(r'''\/\*.*trace_id((?!\/\*).)*rpc_id.*\*\/''', re.VERBOSE) diff --git a/config.py b/config.py index 6cbc1a92..60a650f1 100644 --- a/config.py +++ b/config.py @@ -163,6 +163,14 @@ def __init__(self, config_file=None, stdio=None, config_env_list=[]): parser = ConfigOptionsParserUtil() self.config_data = parser.parse_config(config_env_list) + def update_config_data(self, new_config_data, save_to_file=False): + if not isinstance(new_config_data, dict): + raise ValueError("new_config_data must be a dictionary") + self.config_data.update(new_config_data) + if save_to_file: + with open(self.config_file, 'w') as f: + yaml.dump(self.config_data, f, default_flow_style=False) + def _safe_get(self, dictionary, *keys, default=None): """Safe way to retrieve nested values from dictionaries""" current = dictionary diff --git a/core.py b/core.py index f96529da..4d633a66 100644 --- a/core.py +++ b/core.py @@ -58,7 +58,10 @@ from colorama import Fore, Style from common.config_helper import ConfigHelper -from common.tool import TimeUtils +from common.tool import TimeUtils, Util +from common.command import get_observer_version_by_sql +from common.ob_connector import OBConnector +from collections import OrderedDict class ObdiagHome(object): @@ -151,6 +154,37 @@ def set_context_skip_cluster_conn(self, handler_name, namespace, config): def set_offline_context(self, handler_name, namespace): self.context = HandlerContext(handler_name=handler_name, namespace=namespace, cmd=self.cmds, options=self.options, stdio=self.stdio, inner_config=self.inner_config_manager.config) + def update_obcluster_nodes(self, config): + config_data = config.config_data + cluster_config = config.config_data["obcluster"] + ob_cluster = {"db_host": cluster_config["db_host"], "db_port": cluster_config["db_port"], "tenant_sys": {"user": cluster_config["tenant_sys"]["user"], "password": cluster_config["tenant_sys"]["password"]}} + if Util.check_none_values(ob_cluster, self.stdio): + ob_version = get_observer_version_by_sql(ob_cluster, self.stdio) + obConnetcor = OBConnector(ip=ob_cluster["db_host"], port=ob_cluster["db_port"], username=ob_cluster["tenant_sys"]["user"], password=ob_cluster["tenant_sys"]["password"], stdio=self.stdio, timeout=100) + sql = "select SVR_IP, SVR_PORT, ZONE, BUILD_VERSION from oceanbase.DBA_OB_SERVERS" + if ob_version.startswith("3") or ob_version.startswith("2") or ob_version.startswith("1"): + sql = "select SVR_IP, SVR_PORT, ZONE, BUILD_VERSION from oceanbase.__all_server" + res = obConnetcor.execute_sql(sql) + if len(res) == 0: + raise Exception("Failed to get the node from sql [{0}], " "please check whether the --config option correct!!!".format(sql)) + host_info_list = [] + for row in res: + host_info = OrderedDict() + host_info["ip"] = row[0] + self.stdio.verbose("get host info: %s", host_info) + host_info_list.append(host_info) + config_data_new = copy(config_data) + if 'servers' in config_data_new['obcluster']: + if not isinstance(config_data_new['obcluster']['servers'], dict): + config_data_new['obcluster']['servers'] = {} + if 'nodes' not in config_data_new['obcluster']['servers'] or not isinstance(config_data_new['obcluster']['servers']['nodes'], list): + config_data_new['obcluster']['servers']['nodes'] = [] + for item in host_info_list: + ip = item['ip'] + config_data_new['obcluster']['servers']['nodes'].append({'ip': ip}) + self.stdio.verbose("update nodes [{0}]] config: %s", host_info) + config.update_config_data(config_data_new) + def get_namespace(self, spacename): if spacename in self.namespaces: namespace = self.namespaces[spacename] @@ -198,6 +232,7 @@ def gather_function(self, function_type, opt): return ObdiagResult(ObdiagResult.INPUT_ERROR_CODE, error_data='No such custum config') else: self.stdio.print("{0} start ...".format(function_type)) + self.update_obcluster_nodes(config) self.set_context(function_type, 'gather', config) timestamp = TimeUtils.get_current_us_timestamp() self.context.set_variable('gather_timestamp', timestamp) @@ -280,6 +315,7 @@ def analyze_fuction(self, function_type, opt): else: self.stdio.print("{0} start ...".format(function_type)) if function_type == 'analyze_log': + self.update_obcluster_nodes(config) self.set_context(function_type, 'analyze', config) handler = AnalyzeLogHandler(self.context) return handler.handle() @@ -288,6 +324,7 @@ def analyze_fuction(self, function_type, opt): handler = AnalyzeLogHandler(self.context) return handler.handle() elif function_type == 'analyze_flt_trace': + self.update_obcluster_nodes(config) self.set_context(function_type, 'analyze', config) handler = AnalyzeFltTraceHandler(self.context) return handler.handle() @@ -328,6 +365,7 @@ def check(self, opts): return ObdiagResult(ObdiagResult.INPUT_ERROR_CODE, error_data='No such custum config') else: self.stdio.print("check start ...") + self.update_obcluster_nodes(config) self.set_context('check', 'check', config) obproxy_check_handler = None observer_check_handler = None @@ -368,6 +406,7 @@ def rca_run(self, opts): self._call_stdio('error', 'No such custum config') return ObdiagResult(ObdiagResult.INPUT_ERROR_CODE, error_data='No such custum config') else: + self.update_obcluster_nodes(config) self.set_context('rca_run', 'rca_run', config) try: handler = RCAHandler(self.context) From e8922fbfcc3bbccfc69ee99eb38f1029d7fb436f Mon Sep 17 00:00:00 2001 From: "jingshun.tq" <35712518+Teingi@users.noreply.github.com> Date: Wed, 28 Aug 2024 14:25:09 +0800 Subject: [PATCH 55/68] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E8=8A=82=E7=82=B9?= =?UTF-8?q?=E9=85=8D=E7=BD=AE=E7=94=9F=E6=88=90=E7=9A=84=E6=9D=A1=E4=BB=B6?= =?UTF-8?q?=E5=88=A4=E5=AE=9A=20(#403)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * update version to 2.4.0 * Remove duplicate scripts * The configuration can be passed entirely through parameters * fix * fix * analyze index space add --config option * fix * Support querying nodes through the tenant connection string and completing the node config * ifx * fix --- core.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core.py b/core.py index 4d633a66..7921cde4 100644 --- a/core.py +++ b/core.py @@ -158,6 +158,8 @@ def update_obcluster_nodes(self, config): config_data = config.config_data cluster_config = config.config_data["obcluster"] ob_cluster = {"db_host": cluster_config["db_host"], "db_port": cluster_config["db_port"], "tenant_sys": {"user": cluster_config["tenant_sys"]["user"], "password": cluster_config["tenant_sys"]["password"]}} + if config_data['obcluster'] and config_data['obcluster']['servers'] and config_data['obcluster']['servers']['nodes']: + return if Util.check_none_values(ob_cluster, self.stdio): ob_version = get_observer_version_by_sql(ob_cluster, self.stdio) obConnetcor = OBConnector(ip=ob_cluster["db_host"], port=ob_cluster["db_port"], username=ob_cluster["tenant_sys"]["user"], password=ob_cluster["tenant_sys"]["password"], stdio=self.stdio, timeout=100) From 2702f59ca04a37ad27d313f2b09da8ddfff5172e Mon Sep 17 00:00:00 2001 From: xuyan wang <35394786+wayyoungboy@users.noreply.github.com> Date: Wed, 28 Aug 2024 20:15:18 +0800 Subject: [PATCH 56/68] fix rca err (#404) --- common/command.py | 4 ++-- common/ssh_client/remote_client.py | 1 + handler/rca/rca_handler.py | 8 +++++--- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/common/command.py b/common/command.py index 8d838198..4bd18249 100644 --- a/common/command.py +++ b/common/command.py @@ -189,7 +189,7 @@ def zip_dir(ssh_client, father_dir, zip_dir, stdio=None): Compress files through zip :return: """ - cmd = "zip {father_dir}/{zip_dir}.zip -rm {father_dir}/{zip_dir}".format(father_dir=father_dir, zip_dir=zip_dir) + cmd = "cd {father_dir} && zip {zip_dir}.zip -rm {zip_dir}".format(father_dir=father_dir, zip_dir=zip_dir) ssh_client.exec_cmd(cmd) @@ -198,7 +198,7 @@ def zip_encrypt_dir(ssh_client, zip_password, father_dir, zip_dir, stdio=None): Compress files by encryption :return: """ - cmd = "zip --password {zip_password} {father_dir}/{zip_dir}.zip -rm {father_dir}/{zip_dir}".format(zip_password=zip_password, father_dir=father_dir, zip_dir=zip_dir) + cmd = "cd {father_dir} && zip --password {zip_password} {zip_dir}.zip -rm {zip_dir}".format(zip_password=zip_password, father_dir=father_dir, zip_dir=zip_dir) ssh_client.exec_cmd(cmd) diff --git a/common/ssh_client/remote_client.py b/common/ssh_client/remote_client.py index d61c0821..3cb08197 100644 --- a/common/ssh_client/remote_client.py +++ b/common/ssh_client/remote_client.py @@ -87,6 +87,7 @@ def exec_cmd(self, cmd): if len(stderr.read().decode('utf-8').strip()) > 0: raise Exception(stderr.read().decode('utf-8')) cmd = "sudo {0}".format(cmd) + cmd = cmd.replace("&&", "&& sudo ") self.stdio.verbose('Execute Shell command on server {0}:{1}'.format(self.host_ip, cmd)) stdin, stdout, stderr = self._ssh_fd.exec_command(cmd) err_text = stderr.read() diff --git a/handler/rca/rca_handler.py b/handler/rca/rca_handler.py index 1fe3a14e..35be6529 100644 --- a/handler/rca/rca_handler.py +++ b/handler/rca/rca_handler.py @@ -175,13 +175,15 @@ def execute(self): self.rca_scene.execute() except RCANotNeedExecuteException as e: self.stdio.warn("rca_scene.execute not need execute: {0}".format(e)) - return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, data="rca_scene.execute not need execute: {0}") + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, data={"result": "rca_scene.execute not need execute"}) except Exception as e: - raise Exception("rca_scene.execute err: {0}".format(e)) + self.stdio.error("rca_scene.execute err: {0}".format(e)) + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="rca_scene.execute err: {0}".format(e)) try: self.rca_scene.export_result() except Exception as e: - raise Exception("rca_scene.export_result err: {0}".format(e)) + self.stdio.error("rca_scene.export_result err: {0}".format(e)) + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="rca_scene.export_result err: {0}".format(e)) self.stdio.print("rca finished. For more details, the result on '" + Fore.YELLOW + self.get_result_path() + Style.RESET_ALL + "' \nYou can get the suggest by '" + Fore.YELLOW + "cat " + self.get_result_path() + "/record" + Style.RESET_ALL + "'") return ObdiagResult(ObdiagResult.SUCCESS_CODE, data={"store_dir": self.get_result_path(), "record": self.rca_scene.Result.records_data()}) From 45c61aee3d64441f2c1772556bf5c74e209178d0 Mon Sep 17 00:00:00 2001 From: "jingshun.tq" <35712518+Teingi@users.noreply.github.com> Date: Thu, 29 Aug 2024 11:03:06 +0800 Subject: [PATCH 57/68] Add temp_dir option to support store remote nodes temporary result files (#405) * update version to 2.4.0 * Remove duplicate scripts * The configuration can be passed entirely through parameters * fix * fix * analyze index space add --config option * fix * Support querying nodes through the tenant connection string and completing the node config * ifx * fix * fix * fix * fix --- core.py | 6 ++++++ diag_cmd.py | 5 +++++ handler/analyzer/analyze_flt_trace.py | 5 ++++- handler/analyzer/analyze_log.py | 5 ++++- handler/gather/gather_log.py | 9 +++++++-- handler/gather/gather_obproxy_log.py | 8 ++++++-- handler/gather/gather_scenes.py | 5 +++++ 7 files changed, 37 insertions(+), 6 deletions(-) diff --git a/core.py b/core.py index 7921cde4..510f2e6a 100644 --- a/core.py +++ b/core.py @@ -157,6 +157,12 @@ def set_offline_context(self, handler_name, namespace): def update_obcluster_nodes(self, config): config_data = config.config_data cluster_config = config.config_data["obcluster"] + lst = Util.get_option(self.options, 'config') + if lst: + if any(item.startswith('obcluster.servers.nodes') for item in lst): + return + else: + self.stdio.verbose("You have already provided node information, so there is no need to query node information from the sys tenant") ob_cluster = {"db_host": cluster_config["db_host"], "db_port": cluster_config["db_port"], "tenant_sys": {"user": cluster_config["tenant_sys"]["user"], "password": cluster_config["tenant_sys"]["password"]}} if config_data['obcluster'] and config_data['obcluster']['servers'] and config_data['obcluster']['servers']['nodes']: return diff --git a/diag_cmd.py b/diag_cmd.py index 2564d075..553918c7 100644 --- a/diag_cmd.py +++ b/diag_cmd.py @@ -410,6 +410,7 @@ def __init__(self): self.parser.add_option('--grep', action="append", type='string', help="specify keywords constrain") self.parser.add_option('--encrypt', type='string', help="Whether the returned results need to be encrypted, choices=[true, false]", default="false") self.parser.add_option('--store_dir', type='string', help='the dir to store gather result, current dir by default.', default='./') + self.parser.add_option('--temp_dir', type='string', help='the dir for temporarily storing files on nodes', default='/tmp') self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) self.parser.add_option('--config', action="append", type="string", help='config options Format: --config key=value') @@ -433,6 +434,7 @@ def __init__(self): self.parser.add_option('--grep', action="append", type='string', help="specify keywords constrain") self.parser.add_option('--encrypt', type='string', help="Whether the returned results need to be encrypted, choices=[true, false]", default="false") self.parser.add_option('--store_dir', type='string', help='the dir to store gather result, current dir by default.', default='./') + self.parser.add_option('--temp_dir', type='string', help='the dir for temporarily storing files on nodes', default='/tmp') self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) self.parser.add_option('--config', action="append", type="string", help='config options Format: --config key=value') @@ -662,6 +664,7 @@ def __init__(self): self.parser.add_option('--since', type='string', help="Specify time range that from 'n' [d]ays, 'n' [h]ours or 'n' [m]inutes. before to now. format: . example: 1h.", default='30m') self.parser.add_option('--env', type='string', help='env, eg: "{env1=xxx, env2=xxx}"') self.parser.add_option('--store_dir', type='string', help='the dir to store gather result, current dir by default.', default='./') + self.parser.add_option('--temp_dir', type='string', help='the dir for temporarily storing files on nodes', default='/tmp') self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) self.parser.add_option('--config', action="append", type="string", help='config options Format: --config key=value') @@ -728,6 +731,7 @@ def __init__(self): self.parser.add_option('--files', action="append", type='string', help="specify files") self.parser.add_option('--store_dir', type='string', help='the dir to store gather result, current dir by default.', default='./') self.parser.add_option('--since', type='string', help="Specify time range that from 'n' [d]ays, 'n' [h]ours or 'n' [m]inutes. before to now. format: . example: 1h.", default='30m') + self.parser.add_option('--temp_dir', type='string', help='the dir for temporarily storing files on nodes', default='/tmp') self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) self.parser.add_option('--config', action="append", type="string", help='config options Format: --config key=value') @@ -754,6 +758,7 @@ def __init__(self): self.parser.add_option('--recursion', type='string', help="Maximum number of recursion", default=8) self.parser.add_option('--output', type='string', help="Print the result to the maximum output line on the screen", default=60) self.parser.add_option('--store_dir', type='string', help='the dir to store gather result, current dir by default.', default='./') + self.parser.add_option('--temp_dir', type='string', help='the dir for temporarily storing files on nodes', default='/tmp') self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) self.parser.add_option('--config', action="append", type="string", help='config options Format: --config key=value') diff --git a/handler/analyzer/analyze_flt_trace.py b/handler/analyzer/analyze_flt_trace.py index 8aa0cacf..807a591b 100644 --- a/handler/analyzer/analyze_flt_trace.py +++ b/handler/analyzer/analyze_flt_trace.py @@ -62,6 +62,7 @@ def init_option(self): top_option = Util.get_option(options, 'top') recursion_option = Util.get_option(options, 'recursion') output_option = Util.get_option(options, 'output') + temp_dir_option = Util.get_option(options, 'temp_dir') if store_dir_option is not None: if not os.path.exists(os.path.abspath(store_dir_option)): self.stdio.warn('Warning: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) @@ -82,6 +83,8 @@ def init_option(self): self.max_recursion = int(recursion_option) if output_option: self.output = int(output_option) + if temp_dir_option: + self.gather_ob_log_temporary_dir = temp_dir_option return True def handle(self): @@ -151,7 +154,7 @@ def __handle_from_node(self, node, old_files, local_store_parent_dir): return resp, node_files if not ssh_failed: gather_dir_name = "trace_merged_cache" - gather_dir_full_path = "{0}/{1}".format("/tmp", gather_dir_name) + gather_dir_full_path = "{0}/{1}".format(self.gather_ob_log_temporary_dir, gather_dir_name) mkdir(ssh_client, gather_dir_full_path, self.stdio) if self.is_ssh: self.__get_online_log_file(ssh_client, node, gather_dir_full_path, local_store_dir) diff --git a/handler/analyzer/analyze_log.py b/handler/analyzer/analyze_log.py index 1794f480..f1d13b26 100644 --- a/handler/analyzer/analyze_log.py +++ b/handler/analyzer/analyze_log.py @@ -78,6 +78,7 @@ def init_option(self): scope_option = Util.get_option(options, 'scope') log_level_option = Util.get_option(options, 'log_level') files_option = Util.get_option(options, 'files') + temp_dir_option = Util.get_option(options, 'temp_dir') if files_option: self.is_ssh = False self.directly_analyze_files = True @@ -121,6 +122,8 @@ def init_option(self): self.scope = scope_option if log_level_option: self.log_level = OBLogLevel().get_log_level(scope_option) + if temp_dir_option: + self.gather_ob_log_temporary_dir = temp_dir_option return True def handle(self): @@ -191,7 +194,7 @@ def __handle_from_node(self, node, local_store_parent_dir): from_datetime_timestamp = TimeUtils.timestamp_to_filename_time(TimeUtils.datetime_to_timestamp(self.from_time_str)) to_datetime_timestamp = TimeUtils.timestamp_to_filename_time(TimeUtils.datetime_to_timestamp(self.to_time_str)) gather_dir_name = "ob_log_{0}_{1}_{2}".format(ssh_client.get_name(), from_datetime_timestamp, to_datetime_timestamp) - gather_dir_full_path = "{0}/{1}".format("/tmp", gather_dir_name) + gather_dir_full_path = "{0}/{1}".format(self.gather_ob_log_temporary_dir, gather_dir_name) mkdir(ssh_client, gather_dir_full_path) log_list, resp = self.__handle_log_list(ssh_client, node, resp) diff --git a/handler/gather/gather_log.py b/handler/gather/gather_log.py index 5246ed16..6c957d56 100644 --- a/handler/gather/gather_log.py +++ b/handler/gather/gather_log.py @@ -79,6 +79,7 @@ def init_option(self): grep_option = Util.get_option(options, 'grep') scope_option = Util.get_option(options, 'scope') encrypt_option = Util.get_option(options, 'encrypt') + temp_dir_option = Util.get_option(options, 'temp_dir') if self.context.get_variable("gather_from", None): from_option = self.context.get_variable("gather_from") if self.context.get_variable("gather_to", None): @@ -91,6 +92,8 @@ def init_option(self): scope_option = self.context.get_variable("gather_scope") if self.context.get_variable("gather_grep", None): grep_option = self.context.get_variable("gather_grep") + if self.context.get_variable("temp_dir", None): + temp_dir_option = self.context.get_variable("temp_dir") if from_option is not None and to_option is not None: try: from_timestamp = TimeUtils.parse_time_str(from_option) @@ -128,6 +131,8 @@ def init_option(self): self.zip_encrypt = True if grep_option: self.grep_options = grep_option + if temp_dir_option: + self.gather_ob_log_temporary_dir = temp_dir_option return True def handle(self): @@ -187,7 +192,7 @@ def __handle_from_node(self, pack_dir_this_command, node): from_datetime_timestamp = TimeUtils.timestamp_to_filename_time(TimeUtils.datetime_to_timestamp(self.from_time_str)) to_datetime_timestamp = TimeUtils.timestamp_to_filename_time(TimeUtils.datetime_to_timestamp(self.to_time_str)) gather_dir_name = "ob_log_{0}_{1}_{2}".format(ssh_client.get_name(), from_datetime_timestamp, to_datetime_timestamp) - gather_dir_full_path = "{0}/{1}".format("/tmp", gather_dir_name) + gather_dir_full_path = "{0}/{1}".format(self.gather_ob_log_temporary_dir, gather_dir_name) mkdir(ssh_client, gather_dir_full_path, self.stdio) log_list, resp = self.__handle_log_list(ssh_client, node, resp) @@ -319,7 +324,7 @@ def __pharse_log(self, ssh_client, home_path, log_name, gather_path): self.stdio.verbose('grep files, run cmd = [{0}]'.format(grep_cmd)) ssh_client.exec_cmd(grep_cmd) else: - cp_cmd = "cp {log_dir}/{log_name} {gather_path}/{log_name} ".format(gather_path=gather_path, log_name=log_name, log_dir=log_path) + cp_cmd = "cp -p {log_dir}/{log_name} {gather_path}/{log_name} ".format(gather_path=gather_path, log_name=log_name, log_dir=log_path) self.stdio.verbose('copy files, run cmd = [{0}]'.format(cp_cmd)) ssh_client.exec_cmd(cp_cmd) diff --git a/handler/gather/gather_obproxy_log.py b/handler/gather/gather_obproxy_log.py index 1550505d..c4b89fc8 100644 --- a/handler/gather/gather_obproxy_log.py +++ b/handler/gather/gather_obproxy_log.py @@ -81,6 +81,7 @@ def init_option(self): grep_option = Util.get_option(options, 'grep') encrypt_option = Util.get_option(options, 'encrypt') scope_option = Util.get_option(options, 'scope') + temp_dir_option = Util.get_option(options, 'temp_dir') if self.context.get_variable("gather_from", None): from_option = self.context.get_variable("gather_from") if self.context.get_variable("gather_to", None): @@ -93,7 +94,8 @@ def init_option(self): scope_option = self.context.get_variable("gather_scope") if self.context.get_variable("gather_grep", None): grep_option = self.context.get_variable("gather_grep") - + if self.context.get_variable("temp_dir", None): + temp_dir_option = self.context.get_variable("temp_dir") if from_option is not None and to_option is not None: try: from_timestamp = TimeUtils.parse_time_str(from_option) @@ -131,6 +133,8 @@ def init_option(self): self.zip_encrypt = True if grep_option: self.grep_args = grep_option + if temp_dir_option: + self.gather_log_temporary_dir = temp_dir_option self.stdio.verbose("grep_args:{0}".format(grep_option)) return True @@ -195,7 +199,7 @@ def __handle_from_node(self, node, pack_dir_this_command): from_datetime_timestamp = TimeUtils.timestamp_to_filename_time(TimeUtils.datetime_to_timestamp(self.from_time_str)) to_datetime_timestamp = TimeUtils.timestamp_to_filename_time(TimeUtils.datetime_to_timestamp(self.to_time_str)) gather_dir_name = "obproxy_log_{0}_{1}_{2}".format(ssh_client.get_name(), from_datetime_timestamp, to_datetime_timestamp) - gather_dir_full_path = "{0}/{1}".format("/tmp", gather_dir_name) + gather_dir_full_path = "{0}/{1}".format(self.gather_log_temporary_dir, gather_dir_name) mkdir(ssh_client, gather_dir_full_path, self.stdio) log_list, resp = self.__handle_log_list(ssh_client, node, resp) diff --git a/handler/gather/gather_scenes.py b/handler/gather/gather_scenes.py index 55a17d91..3a25f7e0 100644 --- a/handler/gather/gather_scenes.py +++ b/handler/gather/gather_scenes.py @@ -49,6 +49,7 @@ def __init__(self, context, gather_pack_dir='./', tasks_base_path="~/.obdiag/gat self.task_type = task_type self.variables = {} self.is_inner = is_inner + self.temp_dir = '/tmp' if self.context.get_variable("gather_timestamp", None): self.gather_timestamp = self.context.get_variable("gather_timestamp") else: @@ -70,6 +71,7 @@ def handle(self): if not self.init_config(): self.stdio.error('init config failed') return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="init config failed") + self.context.set_variable('temp_dir', self.temp_dir) self.__init_variables() self.__init_report_path() self.__init_task_names() @@ -182,6 +184,7 @@ def init_option(self): store_dir_option = Util.get_option(options, 'store_dir') env_option = Util.get_option(options, 'env') scene_option = Util.get_option(options, 'scene') + temp_dir_option = Util.get_option(options, 'temp_dir') if from_option is not None and to_option is not None: try: from_timestamp = TimeUtils.parse_time_str(from_option) @@ -218,6 +221,8 @@ def init_option(self): if env_option: env_dict = StringUtils.parse_env(env_option) self.env = env_dict + if temp_dir_option: + self.temp_dir = temp_dir_option return True def __get_sql_result(self): From 85662855bd6e546a4b90195766bcc3e0f50f3f48 Mon Sep 17 00:00:00 2001 From: xuyan wang <35394786+wayyoungboy@users.noreply.github.com> Date: Tue, 3 Sep 2024 17:35:49 +0800 Subject: [PATCH 58/68] add option '-- config', error messages need to be updated (#410) * add option '-- config', error messages need to be updated * remove useless variable --- common/command.py | 4 ++-- common/ssh_client/kubernetes_client.py | 2 +- handler/analyzer/analyze_flt_trace.py | 4 ++-- handler/analyzer/analyze_log.py | 16 +++++----------- handler/gather/gather_obadmin.py | 2 +- handler/gather/gather_obproxy_log.py | 4 ++-- handler/gather/gather_obstack2.py | 6 +++--- handler/gather/gather_perf.py | 4 ++-- handler/gather/gather_sysstat.py | 4 ++-- handler/rca/rca_handler.py | 2 +- .../rca/scene/transaction_not_ending_scene.py | 2 +- 11 files changed, 22 insertions(+), 28 deletions(-) diff --git a/common/command.py b/common/command.py index 4bd18249..e132f755 100644 --- a/common/command.py +++ b/common/command.py @@ -239,9 +239,9 @@ def get_observer_version(context): ob_install_dir = nodes[0].get("home_path") observer_version = get_observer_version_by_ssh(sshclient, ob_install_dir, stdio) except Exception as e: - raise Exception("get observer version fail.") + raise Exception("get observer version fail. Please check conf about observer's node or obconnector's info.") if observer_version == "": - raise Exception("get observer version fail.") + raise Exception("get observer version fail. Please check conf about observer's node or obconnector's info.") return observer_version diff --git a/common/ssh_client/kubernetes_client.py b/common/ssh_client/kubernetes_client.py index 5103571d..04833195 100644 --- a/common/ssh_client/kubernetes_client.py +++ b/common/ssh_client/kubernetes_client.py @@ -37,7 +37,7 @@ def __init__(self, context=None, node=None): config.kube_config.load_kube_config(config_file=config_file) self.client = client.CoreV1Api() except Exception as e: - raise Exception("KubernetesClient load_kube_config error. Please check the config file. {0}".format(e)) + raise Exception("KubernetesClient load_kube_config error. Please check the config. {0}".format(e)) def exec_cmd(self, cmd): exec_command = ['/bin/sh', '-c', cmd] diff --git a/handler/analyzer/analyze_flt_trace.py b/handler/analyzer/analyze_flt_trace.py index 807a591b..f71c4779 100644 --- a/handler/analyzer/analyze_flt_trace.py +++ b/handler/analyzer/analyze_flt_trace.py @@ -147,10 +147,10 @@ def __handle_from_node(self, node, old_files, local_store_parent_dir): ssh_client = SshClient(self.context, node) except Exception as e: ssh = None - self.stdio.exception("ssh {0}@{1}: failed, Please check the {2}".format(remote_user, remote_ip, self.config_path)) + self.stdio.exception("ssh {0}@{1}: failed, Please check the conf.".format(remote_user, remote_ip)) ssh_failed = True resp["skip"] = True - resp["error"] = "Please check the {0}".format(self.config_path) + resp["error"] = "Please check the conf." return resp, node_files if not ssh_failed: gather_dir_name = "trace_merged_cache" diff --git a/handler/analyzer/analyze_log.py b/handler/analyzer/analyze_log.py index f1d13b26..f85eabc3 100644 --- a/handler/analyzer/analyze_log.py +++ b/handler/analyzer/analyze_log.py @@ -172,24 +172,18 @@ def handle_from_node(node): def __handle_from_node(self, node, local_store_parent_dir): resp = {"skip": False, "error": ""} - ssh_client = SshClient(self.context, node) + remote_ip = node.get("ip") if self.is_ssh else '127.0.0.1' + node_results = [] try: - node_results = [] - remote_ip = node.get("ip") if self.is_ssh else '127.0.0.1' - remote_user = node.get("ssh_username") - remote_password = node.get("ssh_password") - remote_port = node.get("ssh_port") - remote_private_key = node.get("ssh_key_file") - remote_home_path = node.get("home_path") + ssh_client = SshClient(self.context, node) self.stdio.verbose("Sending Collect Shell Command to node {0} ...".format(remote_ip)) DirectoryUtil.mkdir(path=local_store_parent_dir, stdio=self.stdio) local_store_dir = "{0}/{1}".format(local_store_parent_dir, ssh_client.get_name()) DirectoryUtil.mkdir(path=local_store_dir, stdio=self.stdio) except Exception as e: - ssh_failed = True resp["skip"] = True - resp["error"] = "Please check the {0}".format(self.config_path) - raise Exception("Please check the {0}".format(self.config_path)) + resp["error"] = "Please check the node conf about {0}".format(remote_ip) + raise Exception("Please check the node conf about {0}".format(remote_ip)) from_datetime_timestamp = TimeUtils.timestamp_to_filename_time(TimeUtils.datetime_to_timestamp(self.from_time_str)) to_datetime_timestamp = TimeUtils.timestamp_to_filename_time(TimeUtils.datetime_to_timestamp(self.to_time_str)) diff --git a/handler/gather/gather_obadmin.py b/handler/gather/gather_obadmin.py index 93c6ece2..47e0d271 100644 --- a/handler/gather/gather_obadmin.py +++ b/handler/gather/gather_obadmin.py @@ -177,7 +177,7 @@ def __handle_from_node(self, local_stored_path, node): except Exception as e: self.stdio.error("ssh {0}@{1}: failed, Please check the {2}".format(remote_user, remote_ip, self.config_path)) resp["skip"] = True - resp["error"] = "Please check the {0}".format(self.config_path) + resp["error"] = "Please check the node conf." return resp if not ssh_failed: mkdir_cmd = "mkdir -p {0}".format(remote_dir_full_path) diff --git a/handler/gather/gather_obproxy_log.py b/handler/gather/gather_obproxy_log.py index c4b89fc8..e512518f 100644 --- a/handler/gather/gather_obproxy_log.py +++ b/handler/gather/gather_obproxy_log.py @@ -190,10 +190,10 @@ def __handle_from_node(self, node, pack_dir_this_command): try: ssh_client = SshClient(self.context, node) except Exception as e: - self.stdio.exception("ssh {0}@{1}: failed, Please check the {2}".format(remote_user, remote_ip, self.config_path)) + self.stdio.exception("ssh {0}@{1}: failed, Please check the node conf.".format(remote_user, remote_ip)) ssh_failed = True resp["skip"] = True - resp["error"] = "Please check the {0}".format(self.config_path) + resp["error"] = "Please check the node conf." return resp if not ssh_failed: from_datetime_timestamp = TimeUtils.timestamp_to_filename_time(TimeUtils.datetime_to_timestamp(self.from_time_str)) diff --git a/handler/gather/gather_obstack2.py b/handler/gather/gather_obstack2.py index 4a922f6b..b57e6f60 100644 --- a/handler/gather/gather_obstack2.py +++ b/handler/gather/gather_obstack2.py @@ -121,10 +121,10 @@ def __handle_from_node(self, local_stored_path, node): try: ssh_client = SshClient(self.context, node) except Exception as e: - self.stdio.exception("ssh {0}@{1}: failed, Please check the {2}".format(remote_user, remote_ip, self.config_path)) + self.stdio.exception("ssh {0}@{1}: failed, Please check the node conf.".format(remote_user, remote_ip)) resp["skip"] = True - resp["error"] = "Please check the {0}".format(self.config_path) - raise Exception("Please check the {0}".format(self.config_path)) + resp["error"] = "Please check the node conf." + raise Exception("Please check the node conf.") if not is_support_arch(ssh_client): resp["error"] = "remote server {0} arch not support gather obstack".format(ssh_client.get_name()) diff --git a/handler/gather/gather_perf.py b/handler/gather/gather_perf.py index 61d71450..84b217ce 100644 --- a/handler/gather/gather_perf.py +++ b/handler/gather/gather_perf.py @@ -130,10 +130,10 @@ def __handle_from_node(self, node, local_stored_path): try: ssh_client = SshClient(self.context, node) except Exception as e: - self.stdio.exception("ssh {0}@{1}: failed, Please check the {2}".format(remote_user, remote_ip, self.config_path)) + self.stdio.exception("ssh {0}@{1}: failed, Please check the node conf.".format(remote_user, remote_ip)) ssh_failed = True resp["skip"] = True - resp["error"] = "Please check the {0}".format(self.config_path) + resp["error"] = "Please check the node conf." return resp if not ssh_failed: mkdir(ssh_client, remote_dir_full_path, self.stdio) diff --git a/handler/gather/gather_sysstat.py b/handler/gather/gather_sysstat.py index f6aea7c2..f78d0af8 100644 --- a/handler/gather/gather_sysstat.py +++ b/handler/gather/gather_sysstat.py @@ -131,10 +131,10 @@ def __handle_from_node(self, node, local_stored_path): try: ssh_client = SshClient(self.context, node) except Exception as e: - self.stdio.exception("ssh {0}@{1}: failed, Please check the {2}".format(remote_user, remote_ip, self.config_path)) + self.stdio.exception("ssh {0}@{1}: failed, Please check the node conf.".format(remote_user, remote_ip)) ssh_failed = True resp["skip"] = True - resp["error"] = "Please check the {0}".format(self.config_path) + resp["error"] = "Please check the node conf." if not ssh_failed: mkdir(ssh_client, remote_dir_full_path, self.stdio) diff --git a/handler/rca/rca_handler.py b/handler/rca/rca_handler.py index 35be6529..89fcfe44 100644 --- a/handler/rca/rca_handler.py +++ b/handler/rca/rca_handler.py @@ -74,7 +74,7 @@ def __init__(self, context): ) self.context.set_variable("ob_connector", ob_connector) except Exception as e: - self.stdio.warn("RCAHandler init ob_connector failed: {0}. If the scene need it, please check the conf.yaml".format(str(e))) + self.stdio.warn("RCAHandler init ob_connector failed: {0}. If the scene need it, please check the conf".format(str(e))) # build report store_dir = Util.get_option(self.options, "store_dir") if store_dir is None: diff --git a/handler/rca/scene/transaction_not_ending_scene.py b/handler/rca/scene/transaction_not_ending_scene.py index 31cde799..25a17639 100644 --- a/handler/rca/scene/transaction_not_ending_scene.py +++ b/handler/rca/scene/transaction_not_ending_scene.py @@ -128,7 +128,7 @@ def get_scene_info(self): return { "name": "transaction_not_ending", "info_en": "transaction wait timeout error (beta), error_code like -4012", - "info_cn": "事务不结束场景(测试板),目前使用较为复杂", + "info_cn": "事务不结束场景(测试版),目前使用较为复杂", } From 3cb83cf5f0a6bea5c35c64f01003eec8d258a816 Mon Sep 17 00:00:00 2001 From: xuyan wang <35394786+wayyoungboy@users.noreply.github.com> Date: Tue, 3 Sep 2024 17:36:52 +0800 Subject: [PATCH 59/68] update: Change the output method of index space analysis and return ObdiagResult model (#413) * update: Change the output method of index space analysis and return Obdiagresult * Remove redundant judgment ,and msg 'oceanbase' change to 'OceanBase' --- core.py | 3 +- diag_cmd.py | 39 ++++++++++++++----------- handler/analyzer/analyze_index_space.py | 18 +++++++----- 3 files changed, 34 insertions(+), 26 deletions(-) diff --git a/core.py b/core.py index 510f2e6a..ffc07c30 100644 --- a/core.py +++ b/core.py @@ -360,8 +360,7 @@ def analyze_fuction(self, function_type, opt): elif function_type == 'analyze_index_space': self.set_context(function_type, 'analyze', config) handler = AnalyzeIndexSpaceHandler(self.context) - handler.handle() - return ObdiagResult(ObdiagResult.SUCCESS_CODE, data=handler.execute()) + return handler.handle() else: self._call_stdio('error', 'Not support analyze function: {0}'.format(function_type)) return ObdiagResult(ObdiagResult.INPUT_ERROR_CODE, error_data='Not support analyze function: {0}'.format(function_type)) diff --git a/diag_cmd.py b/diag_cmd.py index 553918c7..560202b9 100644 --- a/diag_cmd.py +++ b/diag_cmd.py @@ -265,12 +265,17 @@ def do_command(self): obdiag = ObdiagHome(stdio=ROOT_IO, config_path=config_path, inner_config_change_map=self.inner_config_change_map, custom_config_env_list=custom_config_env_list) obdiag.set_options(self.opts) obdiag.set_cmds(self.cmds) - ret = self._do_command(obdiag) - exit_code = 0 + ret = None + try: + ret = self._do_command(obdiag) + exit_code = 0 + except Exception as e: + ret = ObdiagResult(code=ObdiagResult.SERVER_ERROR_CODE, error_data="command failed. Please contact OceanBase community. e: {0}".format(e)) + exit_code = 1 # if silent is true ,print ret if ROOT_IO.silent: if isinstance(ret, ObdiagResult) is False: - ROOT_IO.error('The return value of the command is not ObdiagResult. Please contact thebase community. The return value is: {0}'.format(ret)) + ROOT_IO.error('The return value of the command is not ObdiagResult. Please contact OceanBase community. The return value is: {0}'.format(ret)) ret = ObdiagResult(code=ObdiagResult.SERVER_ERROR_CODE, error_data="The return value of the command is not ObdiagResult. Maybe the command not support silent. Please contact thebase community.") ret.set_trace_id(self.trace_id) @@ -402,7 +407,7 @@ def init(self, cmd, args): return self def __init__(self): - super(ObdiagGatherAllCommand, self).__init__('all', 'Gather oceanbase diagnostic info') + super(ObdiagGatherAllCommand, self).__init__('all', 'Gather OceanBase diagnostic info') self.parser.add_option('--from', type='string', help="specify the start of the time range. format: 'yyyy-mm-dd hh:mm:ss'") self.parser.add_option('--to', type='string', help="specify the end of the time range. format: 'yyyy-mm-dd hh:mm:ss'") self.parser.add_option('--since', type='string', help="Specify time range that from 'n' [d]ays, 'n' [h]ours or 'n' [m]inutes. before to now. format: . example: 1h.", default='30m') @@ -426,7 +431,7 @@ def _do_command(self, obdiag): class ObdiagGatherLogCommand(ObdiagOriginCommand): def __init__(self): - super(ObdiagGatherLogCommand, self).__init__('log', 'Gather oceanbase logs from oceanbase machines') + super(ObdiagGatherLogCommand, self).__init__('log', 'Gather OceanBase logs from OceanBase machines') self.parser.add_option('--from', type='string', help="specify the start of the time range. format: 'yyyy-mm-dd hh:mm:ss'") self.parser.add_option('--to', type='string', help="specify the end of the time range. format: 'yyyy-mm-dd hh:mm:ss'") self.parser.add_option('--since', type='string', help="Specify time range that from 'n' [d]ays, 'n' [h]ours or 'n' [m]inutes. before to now. format: . example: 1h.", default='30m') @@ -450,7 +455,7 @@ def _do_command(self, obdiag): class ObdiagGatherParameterCommand(ObdiagOriginCommand): def __init__(self): - super(ObdiagGatherParameterCommand, self).__init__('parameter', 'Gather oceanbase parameters from oceanbase database') + super(ObdiagGatherParameterCommand, self).__init__('parameter', 'Gather OceanBase parameters from OceanBase database') self.parser.add_option('--store_dir', type='string', help='the dir to store gather result, current dir by default.', default='./') self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) self.parser.add_option('--config', action="append", type="string", help='config options Format: --config key=value') @@ -467,7 +472,7 @@ def _do_command(self, obdiag): class ObdiagGatherVariableCommand(ObdiagOriginCommand): def __init__(self): - super(ObdiagGatherVariableCommand, self).__init__('variable', 'Gather oceanbase variables from oceanbase database') + super(ObdiagGatherVariableCommand, self).__init__('variable', 'Gather OceanBase variables from OceanBase database') self.parser.add_option('--store_dir', type='string', help='the dir to store gather result, current dir by default.', default='./') self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) self.parser.add_option('--config', action="append", type="string", help='config options Format: --config key=value') @@ -722,12 +727,12 @@ def _do_command(self, obdiag): class ObdiagAnalyzeLogCommand(ObdiagOriginCommand): def __init__(self): - super(ObdiagAnalyzeLogCommand, self).__init__('log', 'Analyze oceanbase log from online observer machines or offline oceanbase log files') + super(ObdiagAnalyzeLogCommand, self).__init__('log', 'Analyze OceanBase log from online observer machines or offline OceanBase log files') self.parser.add_option('--from', type='string', help="specify the start of the time range. format: 'yyyy-mm-dd hh:mm:ss'") self.parser.add_option('--to', type='string', help="specify the end of the time range. format: 'yyyy-mm-dd hh:mm:ss'") self.parser.add_option('--scope', type='string', help="log type constrains, choices=[observer, election, rootservice, all]", default='all') self.parser.add_option('--grep', action="append", type='string', help="specify keywords constrain") - self.parser.add_option('--log_level', type='string', help="oceanbase logs greater than or equal to this level will be analyze, choices=[DEBUG, TRACE, INFO, WDIAG, WARN, EDIAG, ERROR]") + self.parser.add_option('--log_level', type='string', help="OceanBase logs greater than or equal to this level will be analyze, choices=[DEBUG, TRACE, INFO, WDIAG, WARN, EDIAG, ERROR]") self.parser.add_option('--files', action="append", type='string', help="specify files") self.parser.add_option('--store_dir', type='string', help='the dir to store gather result, current dir by default.', default='./') self.parser.add_option('--since', type='string', help="Specify time range that from 'n' [d]ays, 'n' [h]ours or 'n' [m]inutes. before to now. format: . example: 1h.", default='30m') @@ -751,7 +756,7 @@ def _do_command(self, obdiag): class ObdiagAnalyzeFltTraceCommand(ObdiagOriginCommand): def __init__(self): - super(ObdiagAnalyzeFltTraceCommand, self).__init__('flt_trace', 'Analyze oceanbase trace.log from online observer machines or offline oceanbase trace.log files') + super(ObdiagAnalyzeFltTraceCommand, self).__init__('flt_trace', 'Analyze OceanBase trace.log from online observer machines or offline OceanBase trace.log files') self.parser.add_option('--flt_trace_id', type='string', help="flt trace id, . format: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx") self.parser.add_option('--files', action="append", help="specify files") self.parser.add_option('--top', type='string', help="top leaf span", default=5) @@ -807,7 +812,7 @@ def _do_command(self, obdiag): class ObdiagAnalyzeParameterCommand(MajorCommand): def __init__(self): - super(ObdiagAnalyzeParameterCommand, self).__init__('parameter', 'Analyze oceanbase parameters info') + super(ObdiagAnalyzeParameterCommand, self).__init__('parameter', 'Analyze OceanBase parameters info') self.register_command(ObdiagAnalyzeParameterDiffCommand()) self.register_command(ObdiagAnalyzeParameterDefaultCommand()) @@ -831,7 +836,7 @@ def _do_command(self, obdiag): class ObdiagAnalyzeVariableCommand(MajorCommand): def __init__(self): - super(ObdiagAnalyzeVariableCommand, self).__init__('variable', 'Analyze oceanbase variables info') + super(ObdiagAnalyzeVariableCommand, self).__init__('variable', 'Analyze OceanBase variables info') self.register_command(ObdiagAnalyzeVariableDiffCommand()) @@ -857,7 +862,7 @@ def _do_command(self, obdiag): class ObdiagAnalyzeSQLCommand(ObdiagOriginCommand): def __init__(self): - super(ObdiagAnalyzeSQLCommand, self).__init__('sql', 'Analyze oceanbase sql from sql_audit ') + super(ObdiagAnalyzeSQLCommand, self).__init__('sql', 'Analyze OceanBase sql from sql_audit ') self.parser.add_option('--tenant_name', type='string', help="tenant name") self.parser.add_option('--host', type='string', help="tenant connection host") self.parser.add_option('--port', type='string', help="tenant connection port") @@ -886,7 +891,7 @@ def _do_command(self, obdiag): class ObdiagAnalyzeSQLReviewCommand(ObdiagOriginCommand): def __init__(self): - super(ObdiagAnalyzeSQLReviewCommand, self).__init__('sql_review', 'Analyze oceanbase sql from file') + super(ObdiagAnalyzeSQLReviewCommand, self).__init__('sql_review', 'Analyze OceanBase sql from file') self.parser.add_option('--host', type='string', help="tenant connection host") self.parser.add_option('--port', type='string', help="tenant connection port") self.parser.add_option('--password', type='string', help="tenant connection user password", default='') @@ -910,7 +915,7 @@ def _do_command(self, obdiag): class ObdiagCheckCommand(ObdiagOriginCommand): def __init__(self): - super(ObdiagCheckCommand, self).__init__('check', 'check oceanbase cluster') + super(ObdiagCheckCommand, self).__init__('check', 'check OceanBase cluster') self.parser.add_option('--cases', type='string', help="check observer's cases on package_file") self.parser.add_option('--obproxy_cases', type='string', help="check obproxy's cases on package_file") self.parser.add_option('--store_dir', type='string', help='the dir to store check result, current dir by default.', default='./check_report/') @@ -1057,7 +1062,7 @@ def _do_command(self, obdiag): class ObdiagGatherCommand(MajorCommand): def __init__(self): - super(ObdiagGatherCommand, self).__init__('gather', 'Gather oceanbase diagnostic info') + super(ObdiagGatherCommand, self).__init__('gather', 'Gather OceanBase diagnostic info') self.register_command(ObdiagGatherAllCommand()) self.register_command(ObdiagGatherLogCommand()) self.register_command(ObdiagGatherSysStatCommand()) @@ -1086,7 +1091,7 @@ def __init__(self): class ObdiagAnalyzeCommand(MajorCommand): def __init__(self): - super(ObdiagAnalyzeCommand, self).__init__('analyze', 'Analyze oceanbase diagnostic info') + super(ObdiagAnalyzeCommand, self).__init__('analyze', 'Analyze OceanBase diagnostic info') self.register_command(ObdiagAnalyzeLogCommand()) self.register_command(ObdiagAnalyzeFltTraceCommand()) self.register_command(ObdiagAnalyzeParameterCommand()) diff --git a/handler/analyzer/analyze_index_space.py b/handler/analyzer/analyze_index_space.py index b638e5a8..9fe0b1bc 100644 --- a/handler/analyzer/analyze_index_space.py +++ b/handler/analyzer/analyze_index_space.py @@ -22,6 +22,7 @@ from common.tool import StringUtils, Util from common.ob_connector import OBConnector from common.command import get_observer_version +from result_type import ObdiagResult def translate_byte(B): @@ -93,9 +94,10 @@ def init_option(self): def handle(self): try: - if not self.init_option(): - self.stdio.error('init option failed') - return False + self.init_option() + except Exception as e: + return ObdiagResult(ObdiagResult.INPUT_ERROR_CODE, error_data="init option failed: {0}".format(str(e))) + try: # evaluate the space size of the table where the index is located self.stdio.start_loading('start query estimated_table_data_size, please wait some minutes...') sql = "select svr_ip, svr_port, sum(original_size) as estimated_table_size from oceanbase.__all_virtual_tablet_sstable_macro_info where tablet_id in (select tablet_id from oceanbase.__all_virtual_tablet_to_table_history where table_id = {0}) and (svr_ip, svr_port) in (select svr_ip, svr_port from oceanbase.__all_virtual_ls_meta_table where role = 1) group by svr_ip, svr_port;".format( @@ -104,6 +106,8 @@ def handle(self): self.stdio.verbose("execute_sql is {0}".format(sql)) self.estimated_table_data = self.sys_connector.execute_sql_return_cursor_dictionary(sql).fetchall() self.stdio.stop_loading('succeed') + if len(self.estimated_table_data) == 0: + raise Exception("can not find estimated_table_data on __all_virtual_tablet_sstable_macro_info by table id: {0}. Please wait major or manually major'".format(self.table_id)) # get the sum of all column lengths sql = "select table_id, sum(data_length) as all_columns_length from oceanbase.__all_virtual_column_history where tenant_id = '{0}' and table_id = '{1}';".format(self.tenant_id, self.table_id) self.stdio.verbose("execute_sql is {0}".format(sql)) @@ -160,11 +164,11 @@ def handle(self): node_result_map["available_disk_space"] = translate_byte(available_disk_space) self.result_map_list.append(node_result_map) self.export_report_table() - except Exception as e: - self.stdio.verbose("analyze index space error: {0}".format(e)) - sys.exit() - finally: self.stdio.verbose("end analyze index space") + return ObdiagResult(ObdiagResult.SUCCESS_CODE, data=self.execute()) + except Exception as e: + self.stdio.error("analyze index space error: {0}".format(e)) + return ObdiagResult(ObdiagResult.SERVER_ERROR_CODE, error_data="analyze index space error: {0}".format(e)) def execute(self): result_map = {} From 6d6d4a64fda073ea4c7d8e10eae679118707a051 Mon Sep 17 00:00:00 2001 From: xuyan wang <35394786+wayyoungboy@users.noreply.github.com> Date: Tue, 3 Sep 2024 18:56:49 +0800 Subject: [PATCH 60/68] update: when '.yaml' in task and the task not exist, doubly check the task name (#414) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * update: when '.yaml' in task and the task not exist, doubly check the task's name * change err msg * add stdio.suggest func ,and use it to gather_scenes.py * fix gather scene print_result * change gather_scenes suggest "should" be "can" * update stdio.suggest func --- handler/gather/gather_scenes.py | 5 ++++- stdio.py | 10 +++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/handler/gather/gather_scenes.py b/handler/gather/gather_scenes.py index 3a25f7e0..81b798ef 100644 --- a/handler/gather/gather_scenes.py +++ b/handler/gather/gather_scenes.py @@ -80,6 +80,7 @@ def handle(self): result = self.__get_sql_result() return ObdiagResult(ObdiagResult.SUCCESS_CODE, data={"store_dir": self.report_path}) else: + self.__print_result() return ObdiagResult(ObdiagResult.SUCCESS_CODE, data={"store_dir": self.report_path}) def execute(self): @@ -140,7 +141,9 @@ def __init_task_names(self): if yaml_task_data: self.yaml_tasks[item] = yaml_task_data else: - self.stdio.error("Invalid Task :{0}".format(item)) + self.stdio.error("Invalid Task :{0}. Please check the task is exist.".format(item)) + if ".yaml" in item: + self.stdio.suggest("'.yaml' in task :{0}. Maybe you can remove it. use '--scene={1}'".format(item, item.replace(".yaml", ""))) # hard code add gather observer.base if len(self.code_tasks) > 0: yaml_task_base = scene.get_one_yaml_task("observer.base") diff --git a/stdio.py b/stdio.py index 5a192ea9..fa547c0c 100644 --- a/stdio.py +++ b/stdio.py @@ -26,7 +26,7 @@ from enum import Enum from halo import Halo, cursor -from colorama import Fore +from colorama import Fore, Style from prettytable import PrettyTable from progressbar import AdaptiveETA, Bar, SimpleProgress, ETA, FileTransferSpeed, Percentage, ProgressBar from types import MethodType @@ -194,6 +194,10 @@ def __str__(self): def info(text): return FormtatText(text, Fore.BLUE) + @staticmethod + def suggest(text): + return FormtatText(text, Fore.GREEN) + @staticmethod def success(text): return FormtatText(text, Fore.GREEN) @@ -366,6 +370,7 @@ class IO(object): VERBOSE_LEVEL = 0 WARNING_PREV = FormtatText.warning('[WARN]') ERROR_PREV = FormtatText.error('[ERROR]') + SUGGEST_PREV = FormtatText.suggest('[SUGGEST]') def __init__(self, level, msg_lv=MsgLevel.DEBUG, use_cache=False, track_limit=0, root_io=None, input_stream=SysStdin, output_stream=sys.stdout, error_stream=sys.stdout, silent=False): self.silent = silent @@ -806,6 +811,9 @@ def _flush_cache(self): def print(self, msg, *args, **kwargs): self._print(MsgLevel.INFO, msg, *args, **kwargs) + def suggest(self, msg, *args, **kwargs): + self._print(MsgLevel.INFO, msg, prev_msg=self.SUGGEST_PREV.format(self.isatty()), *args, **kwargs) + def warn(self, msg, *args, **kwargs): self._print(MsgLevel.WARN, msg, prev_msg=self.WARNING_PREV.format(self.isatty()), *args, **kwargs) From ee1af55fe088e0555154cebf18e7c90366cb0f4d Mon Sep 17 00:00:00 2001 From: xuyan wang <35394786+wayyoungboy@users.noreply.github.com> Date: Tue, 3 Sep 2024 18:57:07 +0800 Subject: [PATCH 61/68] fix: When the reply message cannot be encoded by UTF-8, return the original message directly (#411) * fix: When the reply message cannot be encoded by UTF-8, return the original message directly * move variable to delete redundance command * add check stdout is not None * add check stdout is not None --- common/ssh_client/local_client.py | 12 +++++++++++- common/ssh_client/remote_client.py | 10 +++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/common/ssh_client/local_client.py b/common/ssh_client/local_client.py index b7d849d6..00e03e3e 100644 --- a/common/ssh_client/local_client.py +++ b/common/ssh_client/local_client.py @@ -26,13 +26,23 @@ def __init__(self, context=None, node=None): super().__init__(context, node) def exec_cmd(self, cmd): + stdout, stderr = None, None try: self.stdio.verbose("[local host] run cmd = [{0}] on localhost".format(cmd)) out = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, executable='/bin/bash') stdout, stderr = out.communicate() if stderr: return stderr.decode('utf-8') - return stdout.decode('utf-8') + if stdout: + return stdout.decode('utf-8') + return "" + except UnicodeDecodeError as e: + self.stdio.warn("[localhost] Execute Shell command UnicodeDecodeError, command=[{0}] Exception = [{1}]".format(cmd, e)) + if stderr: + return str(stderr) + if stdout: + return str(stdout) + return "" except Exception as e: self.stdio.error("run cmd = [{0}] on localhost, Exception = [{1}]".format(cmd, e)) raise Exception("[localhost] Execute Shell command failed, command=[{0}] Exception = [{1}]".format(cmd, e)) diff --git a/common/ssh_client/remote_client.py b/common/ssh_client/remote_client.py index 3cb08197..4f1bb4b8 100644 --- a/common/ssh_client/remote_client.py +++ b/common/ssh_client/remote_client.py @@ -65,7 +65,7 @@ def __init__(self, context, node): self._ssh_fd.load_system_host_keys() self._ssh_fd.connect(hostname=self.host_ip, username=self.username, key_filename=self.key_file, port=self.ssh_port, disabled_algorithms=self._disabled_rsa_algorithms) except AuthenticationException: - self.password = input("Authentication failed, Input {0}@{1} password:\n".format(self.username, self.ssh_port)) + self.password = input("Authentication failed, Input {0}@{1} password:\n".format(self.username, self.host_ip)) self.need_password = True self._ssh_fd.connect(hostname=self.host_ip, username=self.username, password=self.password, port=self.ssh_port, disabled_algorithms=self._disabled_rsa_algorithms) except Exception as e: @@ -78,6 +78,7 @@ def __init__(self, context, node): self._ssh_fd.connect(hostname=self.host_ip, username=self.username, password=self.password, port=self.ssh_port, disabled_algorithms=self._disabled_rsa_algorithms) def exec_cmd(self, cmd): + stdin, stdout, stderr = None, None, None try: if self.remote_client_sudo: # check sudo without password @@ -94,6 +95,13 @@ def exec_cmd(self, cmd): if len(err_text): return err_text.decode('utf-8') return stdout.read().decode('utf-8') + except UnicodeDecodeError as e: + self.stdio.warn("[remote] Execute Shell command UnicodeDecodeError, command=[{0}] Exception = [{1}]".format(cmd, e)) + if stderr: + return str(stderr) + if stdout: + return str(stdout) + return "" except SSHException as e: raise OBDIAGShellCmdException("Execute Shell command on server {0} failed, " "command=[{1}], exception:{2}".format(self.host_ip, cmd, e)) From 0849afd430538ae41067c7d7ca6f232b819215a8 Mon Sep 17 00:00:00 2001 From: xuyan wang <35394786+wayyoungboy@users.noreply.github.com> Date: Tue, 3 Sep 2024 18:57:44 +0800 Subject: [PATCH 62/68] fix: Progress bars should not be printed in silent mode (#412) * fix: Progress bars should not be printed in silent mode * add default for silent * add default for silent --- common/ssh_client/base.py | 3 +++ common/ssh_client/remote_client.py | 2 ++ 2 files changed, 5 insertions(+) diff --git a/common/ssh_client/base.py b/common/ssh_client/base.py index 16021e7c..81cb184a 100644 --- a/common/ssh_client/base.py +++ b/common/ssh_client/base.py @@ -30,6 +30,7 @@ def __init__(self, context, node): self.node = node self.ssh_type = node.get("ssh_type") or "remote" self.client = None + self.inner_config_manager = context.inner_config def exec_cmd(self, cmd): raise Exception("the client type is not support exec_cmd") @@ -53,6 +54,8 @@ def get_ip(self): return self.client.get_ip() def progress_bar(self, transferred, to_be_transferred, suffix=''): + if self.inner_config_manager.get("obdiag", default={"logger": {"silent": False}}).get("logger").get("silent"): + return bar_len = 20 filled_len = int(round(bar_len * transferred / float(to_be_transferred))) percents = round(20.0 * transferred / float(to_be_transferred), 1) diff --git a/common/ssh_client/remote_client.py b/common/ssh_client/remote_client.py index 4f1bb4b8..04717dc3 100644 --- a/common/ssh_client/remote_client.py +++ b/common/ssh_client/remote_client.py @@ -113,6 +113,8 @@ def download(self, remote_path, local_path): self._sftp_client.close() def progress_bar(self, transferred, to_be_transferred, suffix=''): + if self.inner_config_manager.get("obdiag", default={"logger": {"silent": False}}).get("logger").get("silent"): + return bar_len = 20 filled_len = int(round(bar_len * transferred / float(to_be_transferred))) percents = round(20.0 * transferred / float(to_be_transferred), 1) From b6a238b4d78287d7391a0e68dec127544b7fb346 Mon Sep 17 00:00:00 2001 From: xuyan wang <35394786+wayyoungboy@users.noreply.github.com> Date: Wed, 4 Sep 2024 11:33:25 +0800 Subject: [PATCH 63/68] fix: delete dict "default=" on ssh_client package, it is useless (#418) * fix: Progress bars should not be printed in silent mode * add default for silent * add default for silent * delete dict "default=" on ssh_client package * delete dict "default=" on ssh_client package * delete dict "default=" on ssh_client package --- common/ssh_client/base.py | 2 +- common/ssh_client/remote_client.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/common/ssh_client/base.py b/common/ssh_client/base.py index 81cb184a..3d235863 100644 --- a/common/ssh_client/base.py +++ b/common/ssh_client/base.py @@ -54,7 +54,7 @@ def get_ip(self): return self.client.get_ip() def progress_bar(self, transferred, to_be_transferred, suffix=''): - if self.inner_config_manager.get("obdiag", default={"logger": {"silent": False}}).get("logger").get("silent"): + if self.inner_config_manager.get("obdiag", {}).get("logger", {}).get("silent") or False: return bar_len = 20 filled_len = int(round(bar_len * transferred / float(to_be_transferred))) diff --git a/common/ssh_client/remote_client.py b/common/ssh_client/remote_client.py index 04717dc3..ffcc5587 100644 --- a/common/ssh_client/remote_client.py +++ b/common/ssh_client/remote_client.py @@ -113,7 +113,7 @@ def download(self, remote_path, local_path): self._sftp_client.close() def progress_bar(self, transferred, to_be_transferred, suffix=''): - if self.inner_config_manager.get("obdiag", default={"logger": {"silent": False}}).get("logger").get("silent"): + if self.inner_config_manager.get("obdiag", {}).get("logger", {}).get("silent") or False: return bar_len = 20 filled_len = int(round(bar_len * transferred / float(to_be_transferred))) From 0642f1bac1a94e0ded12b87b062fd70c16e15d63 Mon Sep 17 00:00:00 2001 From: xuyan wang <35394786+wayyoungboy@users.noreply.github.com> Date: Wed, 4 Sep 2024 11:59:14 +0800 Subject: [PATCH 64/68] when init_option Exception , add use stdio.error the msg (#419) --- handler/analyzer/analyze_index_space.py | 1 + 1 file changed, 1 insertion(+) diff --git a/handler/analyzer/analyze_index_space.py b/handler/analyzer/analyze_index_space.py index 9fe0b1bc..ba2b2419 100644 --- a/handler/analyzer/analyze_index_space.py +++ b/handler/analyzer/analyze_index_space.py @@ -96,6 +96,7 @@ def handle(self): try: self.init_option() except Exception as e: + self.stdio.error("init option failed: {0}".format(str(e))) return ObdiagResult(ObdiagResult.INPUT_ERROR_CODE, error_data="init option failed: {0}".format(str(e))) try: # evaluate the space size of the table where the index is located From 240e0a305e82e8b4c1d61562a66b019b65bc0a8a Mon Sep 17 00:00:00 2001 From: "jingshun.tq" <35712518+Teingi@users.noreply.github.com> Date: Fri, 6 Sep 2024 10:02:20 +0800 Subject: [PATCH 65/68] 2.4.0 release update roadmap (#422) * update version to 2.4.0 * Remove duplicate scripts * The configuration can be passed entirely through parameters * fix * fix * analyze index space add --config option * fix * Support querying nodes through the tenant connection string and completing the node config * ifx * fix * fix * fix * fix * 2.4.0 release update roadmap * fix --- README-CN.md | 2 +- README.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README-CN.md b/README-CN.md index 902c56aa..93e19fbf 100644 --- a/README-CN.md +++ b/README-CN.md @@ -78,7 +78,7 @@ obdiag 期望构建一个开放的社区,我们欢迎任何形式的贡献, |2.1.0|2024.04| 2024.05.13|
  • 根因分析场景扩展
  • 新增 ash 报告 采集
| |2.2.0|2024.05| 2024.06.14 |
  • 根因分析场景扩展
  • 巡检场景扩展
| |2.3.0|2024.06| 2024.07.24 |
  • 根因分析场景扩展
  • 新增基础采集功能: tabledump
  • 新增参数/变量比对分析功能
  • 执行底座改造,支持 k8s 部署的 OceanBase 集群诊断
| -|2.4.0|2024.07| - |
  • 易用性改造
  • 索引空间分析
| +|2.4.0|2024.07| 2024.09.03 |
  • 易用性改造
  • 索引空间分析
| |2.5.0|2024.09| - |
  • 场景化一键展示集群诊断信息功能
  • 队列积压分析
| |3.0.0|2024.10| - |
  • SQL 诊断
  • 支持 OMS 诊断
| |3.1.0|2024.11| - |
  • 根因分析场景扩展
  • 支持巡检报告比对
| diff --git a/README.md b/README.md index 38d7a9e1..3d9686bd 100644 --- a/README.md +++ b/README.md @@ -82,8 +82,8 @@ obdiag envisions an open community. We welcome your contributions in any form: |2.1.0|2024.04| 2024.05.13|
  • Root Cause Analysis Scenario Expansion
  • Gather ash report
| |2.2.0|2024.05| 2024.06.14 |
  • Root Cause Analysis Scenario Expansion
  • Check Scenario Expansion
| |2.3.0|2024.06| 2024.07.24 |
  • Root Cause Analysis Scenario Expansion
  • Added basic gather feature: tabledump
  • Added parameter/variable gather and analyze feature
  • Execute infrastructure modifications to support diagnostics for OceanBase clusters deployed on Kubernetes (k8s)
| -|2.4.0|2024.07| - |
  • usability improvement
  • Index Space Size Analysis
| -|2.5.0|2024.08| - |
  • Cluster Diagnosis Information Display
  • Queue Analysis
| +|2.4.0|2024.07| 2024.09.03 |
  • usability improvement
  • Index Space Size Analysis
| +|2.5.0|2024.09| - |
  • Cluster Diagnosis Information Display
  • Queue Analysis
| |3.0.0|2024.10| - |
  • Root Cause Analysis Scenario Expansion
  • SQL Diagnosis
| |3.1.0|2024.11| - |
  • Root Cause Analysis Scenario Expansion
  • Supporting Comparative Functionality for Patrol Inspection Reports
| |3.2.0|2024.12| - |
  • Root Cause Analysis Scenario Expansion
  • SQL Diagnosis Phase II, Supporting Root Cause Analysis for SQL problems
| From 98c74f9349f6907392251ca328dbe13fb039eef6 Mon Sep 17 00:00:00 2001 From: "jingshun.tq" <35712518+Teingi@users.noreply.github.com> Date: Mon, 9 Sep 2024 10:12:41 +0800 Subject: [PATCH 66/68] change version to 2.5.0 (#424) --- rpm/build.sh | 2 +- rpm/oceanbase-diagnostic-tool.spec | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rpm/build.sh b/rpm/build.sh index d56d3c4b..726cfb3f 100755 --- a/rpm/build.sh +++ b/rpm/build.sh @@ -2,7 +2,7 @@ python_bin='python' W_DIR=`pwd` -VERSION=${VERSION:-'2.4.0'} +VERSION=${VERSION:-'2.5.0'} function python_version() diff --git a/rpm/oceanbase-diagnostic-tool.spec b/rpm/oceanbase-diagnostic-tool.spec index c3621909..ae8fc600 100644 --- a/rpm/oceanbase-diagnostic-tool.spec +++ b/rpm/oceanbase-diagnostic-tool.spec @@ -1,5 +1,5 @@ Name: oceanbase-diagnostic-tool -Version:2.4.0 +Version:2.5.0 Release: %(echo $RELEASE)%{?dist} Summary: oceanbase diagnostic tool program Group: Development/Tools From 8589cc5316181ba46a33093aaacf11f13eb8e81e Mon Sep 17 00:00:00 2001 From: xuyan wang <35394786+wayyoungboy@users.noreply.github.com> Date: Mon, 9 Sep 2024 16:27:02 +0800 Subject: [PATCH 67/68] fix option '-c' (#425) --- diag_cmd.py | 7 +++++++ rpm/build.sh | 2 +- rpm/oceanbase-diagnostic-tool.spec | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/diag_cmd.py b/diag_cmd.py index 560202b9..c52b9b3b 100644 --- a/diag_cmd.py +++ b/diag_cmd.py @@ -261,6 +261,13 @@ def do_command(self): ROOT_IO.verbose('cmd: %s' % self.prev_cmd) ROOT_IO.verbose('opts: %s' % self.opts) config_path = os.path.expanduser('~/.obdiag/config.yml') + custom_config = Util.get_option(self.opts, 'c') + if custom_config: + if os.path.exists(os.path.abspath(custom_config)): + config_path = custom_config + else: + ROOT_IO.error('The option you provided with -c: {0} is a non-existent configuration file path.'.format(custom_config)) + return custom_config_env_list = Util.get_option(self.opts, 'config') obdiag = ObdiagHome(stdio=ROOT_IO, config_path=config_path, inner_config_change_map=self.inner_config_change_map, custom_config_env_list=custom_config_env_list) obdiag.set_options(self.opts) diff --git a/rpm/build.sh b/rpm/build.sh index 726cfb3f..d56d3c4b 100755 --- a/rpm/build.sh +++ b/rpm/build.sh @@ -2,7 +2,7 @@ python_bin='python' W_DIR=`pwd` -VERSION=${VERSION:-'2.5.0'} +VERSION=${VERSION:-'2.4.0'} function python_version() diff --git a/rpm/oceanbase-diagnostic-tool.spec b/rpm/oceanbase-diagnostic-tool.spec index ae8fc600..c3621909 100644 --- a/rpm/oceanbase-diagnostic-tool.spec +++ b/rpm/oceanbase-diagnostic-tool.spec @@ -1,5 +1,5 @@ Name: oceanbase-diagnostic-tool -Version:2.5.0 +Version:2.4.0 Release: %(echo $RELEASE)%{?dist} Summary: oceanbase diagnostic tool program Group: Development/Tools From e4aba6410cc00b233780773f08b70391653fdd92 Mon Sep 17 00:00:00 2001 From: xuyan wang <35394786+wayyoungboy@users.noreply.github.com> Date: Tue, 10 Sep 2024 19:55:25 +0800 Subject: [PATCH 68/68] update version 2.5.0 (#427) --- rpm/build.sh | 2 +- rpm/oceanbase-diagnostic-tool.spec | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rpm/build.sh b/rpm/build.sh index d56d3c4b..726cfb3f 100755 --- a/rpm/build.sh +++ b/rpm/build.sh @@ -2,7 +2,7 @@ python_bin='python' W_DIR=`pwd` -VERSION=${VERSION:-'2.4.0'} +VERSION=${VERSION:-'2.5.0'} function python_version() diff --git a/rpm/oceanbase-diagnostic-tool.spec b/rpm/oceanbase-diagnostic-tool.spec index c3621909..ae8fc600 100644 --- a/rpm/oceanbase-diagnostic-tool.spec +++ b/rpm/oceanbase-diagnostic-tool.spec @@ -1,5 +1,5 @@ Name: oceanbase-diagnostic-tool -Version:2.4.0 +Version:2.5.0 Release: %(echo $RELEASE)%{?dist} Summary: oceanbase diagnostic tool program Group: Development/Tools