diff --git a/.github/workflows/TestCITools.yml b/.github/workflows/TestCITools.yml index 5672617..4989044 100644 --- a/.github/workflows/TestCITools.yml +++ b/.github/workflows/TestCITools.yml @@ -61,3 +61,13 @@ jobs: duckdb_version: v1.1.3 exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64' extra_toolchains: 'rust' + + code-quality-check: + name: Code quality check + uses: ./.github/workflows/_extension_code_quality.yml + with: + override_repository: duckdb/extension-template + override_ref: main + duckdb_version: v1.1.3 + override_ci_tools_repository: ${{ github.repository }} + ci_tools_version: ${{ github.sha }} diff --git a/.github/workflows/_extension_code_quality.yml b/.github/workflows/_extension_code_quality.yml new file mode 100644 index 0000000..c7669ea --- /dev/null +++ b/.github/workflows/_extension_code_quality.yml @@ -0,0 +1,89 @@ +name: CodeQuality + +on: + workflow_call: + inputs: + explicit_checks: + required: false + type: string + duckdb_version: + required: true + type: string + ci_tools_version: + required: true + type: string + # Override the repo for the CI tools (for testing CI tools itself) + override_ci_tools_repository: + required: false + type: string + default: "duckdb/extension-ci-tools" + override_repository: + required: false + type: string + default: "" + # The git ref used for the override_repository + override_ref: + required: false + type: string + default: "" + + +jobs: + format-check: + name: Format Check + runs-on: ubuntu-20.04 + + env: + CC: gcc-10 + CXX: g++-10 + GEN: ninja + + steps: + - uses: actions/checkout@v4 + name: Checkout override repository + if: ${{inputs.override_repository != ''}} + with: + repository: ${{ inputs.override_repository }} + ref: ${{ inputs.override_ref }} + fetch-depth: 0 + submodules: 'true' + + - uses: actions/checkout@v4 + name: Checkout current repository + if: ${{inputs.override_repository == ''}} + with: + fetch-depth: 0 + submodules: 'true' + + - uses: actions/checkout@v4 + name: Checkout Extension CI tools + with: + path: 'extension-ci-tools' + ref: ${{ inputs.ci_tools_version }} + repository: ${{ inputs.override_ci_tools_repository }} + + - name: Checkout DuckDB to version + if: ${{inputs.duckdb_version != ''}} + run: | + DUCKDB_GIT_VERSION=${{ inputs.duckdb_version }} make set_duckdb_version + + - name: Install + shell: bash + run: sudo apt-get update -y -qq && sudo apt-get install -y -qq ninja-build clang-format-11 && sudo pip3 install cmake-format black cxxheaderparser pcpp + + - name: List Installed Packages + shell: bash + run: pip3 freeze + + - name: Format Check + shell: bash + run: | + clang-format --version + clang-format --dump-config + black --version + make format-check-silent + + - name: Generated Check + shell: bash + run: | + git diff --ignore-submodules --exit-code diff --git a/makefiles/duckdb_extension.Makefile b/makefiles/duckdb_extension.Makefile index 47f32b4..ff793d3 100644 --- a/makefiles/duckdb_extension.Makefile +++ b/makefiles/duckdb_extension.Makefile @@ -132,9 +132,15 @@ wasm_threads: emmake make -j8 -Cbuild/wasm_threads #### Misc -format: - find src/ -iname *.hpp -o -iname *.cpp | xargs clang-format --sort-includes=0 -style=file -i - cmake-format -i CMakeLists.txt +# Rule to fix formatting +format-check: + python extension-ci-tools/scripts/format_extension.py + +format-check-silent: + python extension-ci-tools/scripts/format_extension.py --all --check --silent + +format-fix: + python extension-ci-tools/scripts/format_extension.py --all --fix --noconfirm update: git submodule update --remote --merge diff --git a/scripts/format_extension.py b/scripts/format_extension.py new file mode 100644 index 0000000..cce4eea --- /dev/null +++ b/scripts/format_extension.py @@ -0,0 +1,400 @@ +#!/usr/bin/python + +# this script is used to format the source directory + +import os +import time +import sys +import inspect +import subprocess +import difflib +import re +import concurrent.futures + +def open_utf8(fpath, flags): + import sys + + if sys.version_info[0] < 3: + return open(fpath, flags) + else: + return open(fpath, flags, encoding="utf8") + +try: + ver = subprocess.check_output(('black', '--version'), text=True) + if int(ver.split(' ')[1].split('.')[0]) < 24: + print('you need to run `pip install "black>=24"`', ver) + exit(-1) +except Exception as e: + print('you need to run `pip install "black>=24"`', e) + exit(-1) + +try: + ver = subprocess.check_output(('clang-format', '--version'), text=True) + if '11.' not in ver: + print('you need to run `pip install clang_format==11.0.1 - `', ver) + exit(-1) +except Exception as e: + print('you need to run `pip install clang_format==11.0.1 - `', e) + exit(-1) + +cpp_format_command = 'clang-format --sort-includes=0 -style=file' +cmake_format_command = 'cmake-format' + +try: + subprocess.check_output(('cmake-format', '--version'), text=True) +except Exception as e: + print('you need to run `pip install cmake-format`', e) + exit(-1) + +extensions = [ + '.cpp', + '.ipp', + '.c', + '.hpp', + '.h', + '.cc', + '.hh', + 'CMakeLists.txt', + '.test', + '.test_slow', + '.test_coverage', + '.benchmark', + '.py', + '.java', +] +formatted_directories = ['src', 'test', 'scripts'] +ignored_files = [] +ignored_directories = ['__pycache__', '.pytest_cache'] +format_all = False +check_only = True +confirm = True +silent = False +force = False + + +def print_usage(): + print("Usage: python scripts/format.py [revision|--all] [--check|--fix] [--force]") + print( + " [revision] is an optional revision number, all files that changed since that revision will be formatted (default=HEAD)" + ) + print(" if [revision] is set to --all, all files will be formatted") + print(" --check only prints differences, --fix also fixes the files (--check is default)") + exit(1) + + +if len(sys.argv) == 1: + revision = "HEAD" +elif len(sys.argv) >= 2: + revision = sys.argv[1] +else: + print_usage() + +if len(sys.argv) > 2: + for arg in sys.argv[2:]: + if arg == '--check': + check_only = True + elif arg == '--fix': + check_only = False + elif arg == '--noconfirm': + confirm = False + elif arg == '--confirm': + confirm = True + elif arg == '--silent': + silent = True + elif arg == '--force': + force = True + else: + print_usage() + +if revision == '--all': + format_all = True + + +def file_is_ignored(full_path): + if os.path.basename(full_path) in ignored_files: + return True + dirnames = os.path.sep.join(full_path.split(os.path.sep)[:-1]) + for ignored_directory in ignored_directories: + if ignored_directory in dirnames: + return True + return False + + +def can_format_file(full_path): + global extensions, formatted_directories, ignored_files + if not os.path.isfile(full_path): + return False + fname = full_path.split(os.path.sep)[-1] + found = False + # check file extension + for ext in extensions: + if full_path.endswith(ext): + found = True + break + if not found: + return False + # check ignored files + if file_is_ignored(full_path): + return False + # now check file directory + for dname in formatted_directories: + if full_path.startswith(dname): + return True + return False + + +action = "Formatting" +if check_only: + action = "Checking" + + +def get_changed_files(revision): + proc = subprocess.Popen(['git', 'diff', '--name-only', revision], stdout=subprocess.PIPE) + files = proc.stdout.read().decode('utf8').split('\n') + changed_files = [] + for f in files: + if not can_format_file(f): + continue + if file_is_ignored(f): + continue + changed_files.append(f) + return changed_files + + +if os.path.isfile(revision): + print(action + " individual file: " + revision) + changed_files = [revision] +elif os.path.isdir(revision): + print(action + " files in directory: " + revision) + changed_files = [os.path.join(revision, x) for x in os.listdir(revision)] + + print("Changeset:") + for fname in changed_files: + print(fname) +elif not format_all: + if revision == 'main': + # fetch new changes when comparing to the master + os.system("git fetch origin main:main") + print(action + " since branch or revision: " + revision) + changed_files = get_changed_files(revision) + if len(changed_files) == 0: + print("No changed files found!") + exit(0) + + print("Changeset:") + for fname in changed_files: + print(fname) +else: + print(action + " all files") + +if confirm and not check_only: + print("The files listed above will be reformatted.") + result = input("Continue with changes (y/n)?\n") + if result != 'y': + print("Aborting.") + exit(0) + +format_commands = { + '.cpp': cpp_format_command, + '.ipp': cpp_format_command, + '.c': cpp_format_command, + '.hpp': cpp_format_command, + '.h': cpp_format_command, + '.hh': cpp_format_command, + '.cc': cpp_format_command, + '.txt': cmake_format_command, + '.py': 'black --quiet - --skip-string-normalization --line-length 120 --stdin-filename', + '.java': cpp_format_command, +} + +difference_files = [] + +header_top = "//===----------------------------------------------------------------------===//\n" +header_top += "// DuckDB\n" + "//\n" +header_bottom = "//\n" + "//\n" +header_bottom += "//===----------------------------------------------------------------------===//\n\n" +base_dir = os.path.join(os.getcwd(), 'src/include') + + +def get_formatted_text(f, full_path, directory, ext): + if not can_format_file(full_path): + if not force: + print( + "File " + + full_path + + " is not normally formatted - but attempted to format anyway. Use --force if formatting is desirable" + ) + exit(1) + if f == 'list.hpp': + # fill in list file + file_list = [ + os.path.join(dp, f) + for dp, dn, filenames in os.walk(directory) + for f in filenames + if os.path.splitext(f)[1] == '.hpp' and not f.endswith("list.hpp") + ] + file_list = [x.replace('src/include/', '') for x in file_list] + file_list.sort() + result = "" + for x in file_list: + result += '#include "%s"\n' % (x) + return result + + if ext == ".hpp" and directory.startswith("src/include"): + with open_utf8(full_path, 'r') as f: + lines = f.readlines() + + # format header in files + header_middle = "// " + os.path.relpath(full_path, base_dir) + "\n" + text = header_top + header_middle + header_bottom + is_old_header = True + for line in lines: + if not (line.startswith("//") or line.startswith("\n")) and is_old_header: + is_old_header = False + if not is_old_header: + text += line + + if ext == '.test' or ext == '.test_slow' or ext == '.test_coverage' or ext == '.benchmark': + f = open_utf8(full_path, 'r') + lines = f.readlines() + f.close() + + found_name = False + found_group = False + group_name = full_path.split('/')[-2] + new_path_line = '# name: ' + full_path + '\n' + new_group_line = '# group: [' + group_name + ']' + '\n' + found_diff = False + # Find description. + found_description = False + for line in lines: + if line.lower().startswith('# description:') or line.lower().startswith('#description:'): + if found_description: + print("Error formatting file " + full_path + ", multiple lines starting with # description found") + exit(1) + found_description = True + new_description_line = '# description: ' + line.split(':', 1)[1].strip() + '\n' + # Filter old meta. + meta = ['#name:', '# name:', '#description:', '# description:', '#group:', '# group:'] + lines = [line for line in lines if not any(line.lower().startswith(m) for m in meta)] + # Clean up empty leading lines. + while lines and not lines[0].strip(): + lines.pop(0) + # Ensure header is prepended. + header = [new_path_line] + if found_description: + header.append(new_description_line) + header.append(new_group_line) + header.append('\n') + return ''.join(header + lines) + proc_command = format_commands[ext].split(' ') + [full_path] + proc = subprocess.Popen( + proc_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=open(full_path) if ext == '.py' else None + ) + new_text = proc.stdout.read().decode('utf8') + stderr = proc.stderr.read().decode('utf8') + if len(stderr) > 0: + print(os.getcwd()) + print("Failed to format file " + full_path) + print(' '.join(proc_command)) + print(stderr) + exit(1) + new_text = new_text.replace('\r', '') + new_text = re.sub(r'\n*$', '', new_text) + return new_text + '\n' + + +def file_is_generated(text): + if '// This file is automatically generated by scripts/' in text: + return True + return False + + +def format_file(f, full_path, directory, ext): + global difference_files + with open_utf8(full_path, 'r') as f: + old_text = f.read() + # do not format auto-generated files + if file_is_generated(old_text) and ext != '.py': + return + old_lines = old_text.split('\n') + new_text = get_formatted_text(f, full_path, directory, ext) + if ext in ('.cpp', '.hpp'): + new_text = new_text.replace('ARGS &&...args', 'ARGS &&... args') + if check_only: + new_lines = new_text.split('\n') + old_lines = [x for x in old_lines if '...' not in x] + new_lines = [x for x in new_lines if '...' not in x] + diff_result = difflib.unified_diff(old_lines, new_lines) + total_diff = "" + for diff_line in diff_result: + total_diff += diff_line + "\n" + total_diff = total_diff.strip() + + if len(total_diff) > 0: + print("----------------------------------------") + print("----------------------------------------") + print("Found differences in file " + full_path) + print("----------------------------------------") + print("----------------------------------------") + print(total_diff) + difference_files.append(full_path) + else: + tmpfile = full_path + ".tmp" + with open_utf8(tmpfile, 'w+') as f: + f.write(new_text) + os.rename(tmpfile, full_path) + + +def format_directory(directory): + files = os.listdir(directory) + files.sort() + + def process_file(f): + full_path = os.path.join(directory, f) + if os.path.isdir(full_path): + if f in ignored_directories or full_path in ignored_directories: + return + if not silent: + print(full_path) + format_directory(full_path) + elif can_format_file(full_path): + format_file(f, full_path, directory, '.' + f.split('.')[-1]) + + # Create thread for each file + with concurrent.futures.ThreadPoolExecutor() as executor: + threads = [executor.submit(process_file, f) for f in files] + # Wait for all tasks to complete + concurrent.futures.wait(threads) + + +if format_all: + try: + os.system(cmake_format_command.replace("${FILE}", "CMakeLists.txt")) + except: + pass + + for direct in formatted_directories: + format_directory(direct) + +else: + for full_path in changed_files: + splits = full_path.split(os.path.sep) + fname = splits[-1] + dirname = os.path.sep.join(splits[:-1]) + ext = '.' + full_path.split('.')[-1] + format_file(fname, full_path, dirname, ext) + +if check_only: + if len(difference_files) > 0: + print("") + print("") + print("") + print("Failed format-check: differences were found in the following files:") + for fname in difference_files: + print("- " + fname) + print('Run "make format-fix" to fix these differences automatically') + exit(1) + else: + print("Passed format-check") + exit(0)