diff --git a/.cmake-format.py b/.cmake-format.py index 6082af2c1..f37a15522 100644 --- a/.cmake-format.py +++ b/.cmake-format.py @@ -1,3 +1,7 @@ +# Copyright (c) 2024 Orange. All rights reserved. +# This software is distributed under the BSD 3-Clause-clear License, the text of which is available +# at https://spdx.org/licenses/BSD-3-Clause-Clear.html or see the "LICENSE" file for more details. + with section("format"): line_width = 120 tab_size = 2 diff --git a/.github/actions/build-khiops/action.yml b/.github/actions/build-khiops/action.yml index bf3aeff28..fd4368976 100644 --- a/.github/actions/build-khiops/action.yml +++ b/.github/actions/build-khiops/action.yml @@ -13,7 +13,7 @@ inputs: runs: using: composite steps: - - name: Setup MPI (windows) + - name: Setup MPI (Windows) if: runner.os == 'Windows' uses: mpi4py/setup-mpi@v1 - name: Setup MPI (macOS) @@ -29,9 +29,16 @@ runs: sudo apt-get install openmpi-bin libopenmpi-dev sudo update-alternatives --set mpi /usr/bin/mpicc.openmpi sudo update-alternatives --set mpirun /usr/bin/mpirun.openmpi - - name: Setup Ninja - uses: ashutoshvarma/setup-ninja@v1.1 - - name: Load Visual C++ Environment Variables (Windows only) + - name: Setup Ninja (Linux) + if: runner.os == 'Linux' + shell: bash + run: sudo apt-get install ninja-build + - name: Setup Ninja (macOS) + if: runner.os == 'macOS' + shell: bash + run: brew install ninja + # This step is required to have ninja and cmake in the path for windows + - name: Load Visual C++ Environment Variables (Windows) if: runner.os == 'Windows' shell: cmd run: | diff --git a/.github/actions/check-tag-version/action.yml b/.github/actions/check-tag-version/action.yml new file mode 100644 index 000000000..4b5bc5a33 --- /dev/null +++ b/.github/actions/check-tag-version/action.yml @@ -0,0 +1,17 @@ +--- +name: Check tag/version consistency +description: Check that the Khiops source version matches a tag +runs: + using: composite + steps: + - name: Check that the Khiops source version matches the input tag + shell: bash + run: |- + SOURCE_VERSION=$(./scripts/khiops-version) + if [[ "$SOURCE_VERSION" != "${{ github.ref_name }}" ]] + then + echo "::error::Incompatible source version '$SOURCE_VERSION' with tag '${{ github.ref_name }}'. Either change the tag to match the source version or modify the KHIOPS_VERSION macro in src/Learning/KWUtils/KWKhiopsVersion.h." + exit 1 + else + echo "Tag '${{ github.ref_name }}' OK" + fi diff --git a/.github/actions/test-khiops-install/action.yml b/.github/actions/test-khiops-install/action.yml index be2dc3c47..d1e9a80a3 100644 --- a/.github/actions/test-khiops-install/action.yml +++ b/.github/actions/test-khiops-install/action.yml @@ -1,27 +1,100 @@ --- name: Test Khiops Installation -description: Check the outputs of Khiops in the console +description: Test khiops' scripts (khiops_env, khiops and khiops_coclustering) runs: using: composite - # Test if the output of Khiops in the console is as expected (no mpi or java errors) - # We expect only one line in stdout+stderr when running 'khiops -v' steps: - - name: Print status for debugging - shell: bash - # We force the process number to 4 because the runner has only 2 proc. - # With 2 proc khiops-env switches to serial and mpi is not used. + - name: Test that the executables are installed (Linux/macOS) + # We call khiops_env a first time to test the calling of _khiopsgetprocnumber + # then we play with KHIOPS_PROC_NUMBER: + # - with KHIOPS_PROC_NUMBER=2, khiops should not use MPI, we check that KHIOPS_MPI_COMMAND is empty + # - with KHIOPS_PROC_NUMBER=8, khiops should use MPI, we check that KHIOPS_MPI_COMMAND is not empty and KHIOPS_MPI_ERROR is empty + # # The env var OMPI_MCA_rmaps_base_oversubscribe is for openmpi it corresponds to # the flag --oversubscribe (Nodes are allowed to be oversubscribed) + # KHIOPS_MPI_VERBOSE is set to true to detect errors from MPI + if: runner.os != 'Windows' + shell: bash env: - KHIOPS_PROC_NUMBER: 4 OMPI_MCA_rmaps_base_oversubscribe: true + KHIOPS_MPI_VERBOSE: true run: | - khiops-env --env - - name: Check Khiops output + echo + echo --- Khiops environment with KHIOPS_PROC_NUMBER not set + khiops_env --env + echo + export KHIOPS_PROC_NUMBER=2 + echo --- Khiops environment with KHIOPS_PROC_NUMBER=$KHIOPS_PROC_NUMBER + source khiops_env --env + if [ ! -z "$KHIOPS_MPI_COMMAND" ]; + then + echo "::error::MPI is used even though there are only 2 procs available" + false + fi + echo + export KHIOPS_PROC_NUMBER=8 + echo --- Khiops environment with KHIOPS_PROC_NUMBER=$KHIOPS_PROC_NUMBER + source khiops_env --env + if [ ! -z "$KHIOPS_MPI_ERROR" ]; + then + echo "::error::Unexpected MPI error: $KHIOPS_MPI_ERROR" + false + fi + if [ -z "$KHIOPS_MPI_COMMAND" ]; + then + echo "::error::KHIOPS_MPI_COMMAND is not set" + false + fi + echo + echo --- Khiops status + khiops -s + echo + echo --- Khiops Coclustering status + khiops_coclustering -s + - name: Test that the executables are installed (windows) + if: runner.os == 'Windows' + shell: cmd /C call {0} + run: | + echo. + echo --- Khiops environment with KHIOPS_PROC_NUMBER not set + call khiops_env --env + echo. + set KHIOPS_PROC_NUMBER=2 + echo --- Khiops environment with KHIOPS_PROC_NUMBER=%KHIOPS_PROC_NUMBER% + call khiops_env --env + call khiops_env + if not "%KHIOPS_MPI_COMMAND%". == "". ( + echo "::error::MPI is used even though there are only 2 procs available" + exit 1 + ) + set KHIOPS_PROC_NUMBER=8 + echo. + echo --- Khiops environment with KHIOPS_PROC_NUMBER=%KHIOPS_PROC_NUMBER% + call khiops_env --env + call khiops_env + if not "%KHIOPS_MPI_ERROR%".=="". ( + echo "::error::Unexpected MPI error: %KHIOPS_MPI_ERROR%" + exit 1 + ) + if "%KHIOPS_MPI_COMMAND%". == "". ( + echo "::error::KHIOPS_MPI_COMMAND is not set" + exit 1 + ) + echo. + echo --- Khiops status + khiops -s + echo. + echo --- Khiops Coclustering status + khiops_coclustering -s + - name: Check Khiops output [UNIXes] + # Test if the output of Khiops in the console is as expected (no mpi or java errors) + # We expect only one line in stdout+stderr when running 'khiops -v' + if: runner.os != 'Windows' shell: bash env: KHIOPS_PROC_NUMBER: 4 OMPI_MCA_rmaps_base_oversubscribe: true + KHIOPS_MPI_VERBOSE: true run: | khiops -s khiops -v &> output @@ -31,7 +104,20 @@ runs: echo "::error::Unexpected output in khiops scripts" false fi - - name: Check Khiops coclustering output + - name: Check Khiops output [Windows] + if: runner.os == 'Windows' + shell: cmd /C call {0} + env: + KHIOPS_PROC_NUMBER: 4 + run: | + khiops -s + for /f %%i in (' khiops -v ^| find /c /v "" ') do set "LINE_NUMBER=%%i" + if not "%LINE_NUMBER%" == "1" ( + echo "::error::Unexpected output in khiops scripts" + exit 1 + ) + - name: Check Khiops coclustering output [UNIXes] + if: runner.os != 'Windows' shell: bash run: |- khiops_coclustering -s @@ -42,14 +128,26 @@ runs: echo "::error::Unexpected output in khiops_coclustering scripts" false fi - - name: Check process number + - name: Check Khiops coclustering output [Windows] + if: runner.os == 'Windows' + shell: cmd /C call {0} + run: | + khiops_coclustering -s + for /f %%i in (' khiops_coclustering -v ^| find /c /v "" ') do set "LINE_NUMBER=%%i" + if not "%LINE_NUMBER%" == "1" ( + echo "::error::Unexpected output in khiops_coclustering scripts" + exit 1 + ) + - name: Check process number [UNIXes] + # Test if Khiops has the right number of logical processes + # (it's obvious but you never know) + if: runner.os != 'Windows' shell: bash env: KHIOPS_PROC_NUMBER: 4 OMPI_MCA_rmaps_base_oversubscribe: true run: |- - PROC_NUMBER_LIST=($(khiops -s | grep 'Logical processes on system')) - PROC_NUMBER=${PROC_NUMBER_LIST[-1]} + PROC_NUMBER=$(khiops -s | grep "Logical processes" | awk '{print $NF}') if [ "$PROC_NUMBER" != "$KHIOPS_PROC_NUMBER" ] ; then echo "::error::Wrong proc number ($PROC_NUMBER vs $KHIOPS_PROC_NUMBER)" diff --git a/.github/actions/test-khiops-on-iris/action.yml b/.github/actions/test-khiops-on-iris/action.yml index 1c3455bf2..f1578fa2b 100644 --- a/.github/actions/test-khiops-on-iris/action.yml +++ b/.github/actions/test-khiops-on-iris/action.yml @@ -1,21 +1,29 @@ --- name: Test Khiops on Iris description: Tests a Khiops installation on the Iris dataset +inputs: + os-decription: + description: OS description (os +arch) used to name artifacts (error reports) + required: true runs: using: composite steps: - # Python install: we don't use the setup-python action because of the following error: - # python3: /lib64/libc.so.6: version `GLIBC_2.34' not found (required by python3) - name: Python setup for Linux if: runner.os == 'Linux' shell: bash run: | - # Detect Debian based OS - if [ -d "/etc/apt" ] + # Install Python 3 if not present (already present in conda environment) + if ! command -v python3 &> /dev/null then - apt-get install -y python3 > /dev/null - else - yum install -y python3.11 + # Python install: we don't use the setup-python action because of the following error: + # python3: /lib64/libc.so.6: version `GLIBC_2.34' not found (required by python3) + # Detect Debian based OS + if [ -d "/etc/apt" ] + then + apt-get install -y python3 > /dev/null + else + yum install -y python3.11 + fi fi echo "PYTHON=python3" >> "$GITHUB_ENV" - name: Python setup for Windows or macOS @@ -39,19 +47,12 @@ runs: echo "KHIOPS_SCRIPT=khiops" >> "$GITHUB_ENV" echo "KHIOPS_CC_SCRIPT=khiops_coclustering" >> "$GITHUB_ENV" fi - - name: Check Khiops installation - shell: bash - env: - KHIOPS_PROC_NUMBER: 4 - OMPI_MCA_rmaps_base_oversubscribe: true - run: | - "$KHIOPS_SCRIPT" -s - "$KHIOPS_CC_SCRIPT" -s - name: Run Khiops tests shell: bash env: KHIOPS_PROC_NUMBER: 4 OMPI_MCA_rmaps_base_oversubscribe: true + KHIOPS_MPI_VERBOSE: true run: | cd test/LearningTest/TestKhiops/Standard/Iris/ "$KHIOPS_SCRIPT" -b -i test.prm -e results/err.txt @@ -73,7 +74,17 @@ runs: $PYTHON kht_test.py ../../LearningTest/TestCoclustering/Standard/Iris check $PYTHON kht_apply.py ../../LearningTest/TestKhiops/Standard/Iris errors | tee /tmp/khiops-log.txt $PYTHON kht_apply.py ../../LearningTest/TestCoclustering/Standard/Iris errors | tee /tmp/coclustering-log.txt + $PYTHON kht_collect_results.py ../../LearningTest/TestKhiops/Standard/Iris $GITHUB_WORKSPACE/results/khiops --collect-type warnings + $PYTHON kht_collect_results.py ../../LearningTest/TestCoclustering/Standard/Iris $GITHUB_WORKSPACE/results/coclustering --collect-type warnings if (grep -q error /tmp/khiops-log.txt || grep -q error /tmp/coclustering-log.txt); then echo "::error::Errors in Khiops run" false fi + - name: Upload test results + if: failure() + uses: actions/upload-artifact@v3 + with: + name: test-results-${{ inputs.os-decription}} + retention-days: 7 + path: |- + results/ diff --git a/.github/workflows/build-linux-pack-containers.yml b/.github/workflows/build-linux-pack-containers.yml index c1913e216..c25bf98ff 100644 --- a/.github/workflows/build-linux-pack-containers.yml +++ b/.github/workflows/build-linux-pack-containers.yml @@ -17,6 +17,7 @@ jobs: - ubuntu18.04 - ubuntu20.04 - ubuntu22.04 + - ubuntu24.04 permissions: packages: write # Allows writing in the container registry steps: diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml index 388b78d67..a08a6386b 100644 --- a/.github/workflows/conda.yml +++ b/.github/workflows/conda.yml @@ -31,7 +31,7 @@ jobs: # Linux: We use the Khiops dev debian:10 container to build with glibc 2.28 # It ensures compatibility with glibc >= 2.28 (in particular Rocky 8) - {os: ubuntu-22.04, os-family: linux-64, json-image: '{"image": "ghcr.io/khiopsml/khiops/khiopsdev-debian10:latest"}'} - - {os: windows-2019, os-family: win-64, json-image: '{"image": null}'} + - {os: windows-2022, os-family: win-64, json-image: '{"image": null}'} - {os: macos-13, os-family: osx-64, json-image: '{"image": null}'} - {os: macos-14, os-family: osx-arm64, json-image: '{"image": null}'} container: ${{ fromJSON(matrix.setup.json-image) }} @@ -39,13 +39,16 @@ jobs: steps: - name: Checkout Sources uses: actions/checkout@v4 + - name: Check the tag consistency with the source version + if: github.ref_type == 'tag' + uses: ./.github/actions/check-tag-version - name: Install Miniconda uses: conda-incubator/setup-miniconda@v3 with: - miniconda-version: latest + miniforge-version: latest python-version: '3.12' - name: Install Dependency Requirements for Building Conda Packages - run: conda install conda-build conda-verify + run: conda install -y conda-build # We need MacOS SDK 10.10 to build for macOS Intel # See: https://docs.conda.io/projects/conda-build/en/3.21.x/resources/compiler-tools.html#macos-sdk - name: Install Mac OS SDK 10.10 @@ -57,15 +60,13 @@ jobs: shell: bash run: | # Put the Khiops package version - # The conda version cannot have '-' as a character so we eliminate it if [[ "${{ github.ref_type }}" == "tag" ]] then - KHIOPS_RAW_VERSION="${{ github.ref_name }}" + KHIOPS_VERSION="${{ github.ref_name }}" else - KHIOPS_RAW_VERSION="$(./scripts/khiops-version)" + KHIOPS_VERSION="$(./scripts/khiops-version)" fi - echo "KHIOPS_VERSION=$(echo $KHIOPS_RAW_VERSION | sed 's/-//')" >> "$GITHUB_ENV" - + echo "KHIOPS_VERSION=$KHIOPS_VERSION" >> "$GITHUB_ENV" # On tag and macOS: Set the environment variables to sign the binaries if [[ "${{ runner.os }}" == "macOS" && "${{ github.ref_type }}" == "tag" ]] then @@ -74,13 +75,9 @@ jobs: echo "KHIOPS_APPLE_CERTIFICATE_PASSWORD=${{ secrets.KHIOPS_APPLE_CERTIFICATE_PASSWORD }}" >> "$GITHUB_ENV" echo "KHIOPS_APPLE_TMP_KEYCHAIN_PASSWORD=${{ secrets.KHIOPS_APPLE_TMP_KEYCHAIN_PASSWORD }}" >> "$GITHUB_ENV" fi - - name: Build conda packages (Windows) - if: runner.os == 'Windows' - run: conda build --output-folder ./build/conda ./packaging/conda - # In Linux/macOS we need the conda-forge channel to install their pinned versions - - name: Build conda packages (Linux/macOS) - if: runner.os != 'Windows' - run: conda build --channel conda-forge --output-folder ./build/conda ./packaging/conda + - name: Build conda packages + run: | + conda build --output-folder ./build/conda ./packaging/conda - name: Upload conda packages artifact uses: actions/upload-artifact@v4 with: @@ -96,6 +93,7 @@ jobs: env: - {os: ubuntu-20.04, os-family: linux-64} - {os: ubuntu-22.04, os-family: linux-64} + - {os: ubuntu-24.04, os-family: linux-64} - {os: windows-2019, os-family: win-64} - {os: windows-2022, os-family: win-64} - {os: macos-12, os-family: osx-64} @@ -106,24 +104,31 @@ jobs: - name: Install Miniconda uses: conda-incubator/setup-miniconda@v3 with: - miniconda-version: latest # needed for macOS 13 + miniforge-version: latest # needed for macOS 13 python-version: ${{ matrix.python-version }} - name: Download Conda Package Artifact uses: actions/download-artifact@v4 with: name: khiops-conda-${{ matrix.env.os-family }} path: ./build/conda - - name: Install the Conda package (Windows) - if: runner.os == 'Windows' - run: conda install --channel ./build/conda khiops-core - # In Linux/macOS we need the conda-forge channel to install their pinned versions - - name: Install the Conda package (Linux/macOS) - if: runner.os != 'Windows' - run: conda install --channel conda-forge --channel ./build/conda khiops-core + - name: Install the Conda package + run: | + conda install --channel ./build/conda khiops-core - name: Test that the executables are installed run: | - MODL -v - MODL_Coclustering -v + conda install --channel conda-forge --channel ./build/conda khiops-core + - name: Add CONDA_PREFIX to shared PATH + run: | + echo "$CONDA_PREFIX/bin" >> $GITHUB_PATH + - name: Checkout sources + uses: actions/checkout@v4 + - name: Test that the executables are installed + uses: ./.github/actions/test-khiops-install + - name: Test that khiops on Iris dataset + uses: ./.github/actions/test-khiops-on-iris + with: + os-decription: ${{ matrix.env.os }}-${{ matrix.env.os-family }} + # Release is only executed on tags # Note: For this job to work the secrets variables KHIOPS_ANACONDA_CHANNEL_TOKEN and # KHIOPS_DEV_ANACONDA_CHANNEL_TOKEN must be set with valid anaconda.org access tokens @@ -144,10 +149,11 @@ jobs: - name: Install Miniconda uses: conda-incubator/setup-miniconda@v3 with: - miniconda-version: latest + miniforge-version: latest python-version: '3.12' - name: Install requirement packages - run: conda install -y anaconda-client conda-index + run: | + conda install -y anaconda-client conda-index - name: Reindex the package directory run: python -m conda_index ./build/conda - name: Upload the packages to anaconda.org @@ -155,9 +161,10 @@ jobs: # Set the anaconda.org channel ANACONDA_CHANNEL="${{ inputs.release-channel || 'khiops-dev' }}" - # For the release channel: upload without forcing + # For the release channel: upload without forcing; do not upload kni-transfer if [[ "$ANACONDA_CHANNEL" == "khiops" ]] then + rm -f -v ./build/conda/*/kni-transfer*.tar.bz2 anaconda --token "${{ secrets.KHIOPS_ANACONDA_CHANNEL_TOKEN }}" upload \ --user "$ANACONDA_CHANNEL" ./build/conda/*/*.tar.bz2 # For the dev channel: upload with forcing @@ -189,7 +196,7 @@ jobs: with: allowUpdates: true body: | - **This release is for testing purporses only and there is no support for it.** + **This release is for testing purposes only and there is no support for it.** **Go to https://khiops.org to install the latest supported version.** draft: false makeLatest: false diff --git a/.github/workflows/pack-debian.yml b/.github/workflows/pack-debian.yml index ccd544d73..e8dafaed0 100644 --- a/.github/workflows/pack-debian.yml +++ b/.github/workflows/pack-debian.yml @@ -7,7 +7,7 @@ on: description: mpi implementation type: choice default: openmpi - options: + options: - mpich - openmpi pull_request: @@ -30,19 +30,22 @@ jobs: strategy: fail-fast: false matrix: - os: [debian10, debian11, debian12, ubuntu22.04, ubuntu20.04] + os: [debian10, debian11, debian12, ubuntu20.04, ubuntu22.04, ubuntu24.04] container: image: ghcr.io/khiopsml/khiops/khiopsdev-${{ matrix.os }}:latest steps: - name: Checkout sources uses: actions/checkout@v4 + - name: Check the tag consistency with the source version + if: github.ref_type == 'tag' + uses: ./.github/actions/check-tag-version - name: Set environment variables run: | source /etc/os-release echo "ID=$ID" >> "$GITHUB_ENV" echo "VERSION_CODENAME=$VERSION_CODENAME" >> "$GITHUB_ENV" echo "MPI_IMPLEMENTATION=${{ inputs.mpi_implementation || 'openmpi' }}" >> "$GITHUB_ENV" - + - name: Configure CMake run: | update-alternatives --set mpi /usr/bin/mpicc.$MPI_IMPLEMENTATION @@ -51,7 +54,7 @@ jobs: - name: Build Khiops binaries run: | cmake --build --preset linux-gcc-release --parallel \ - --target MODL MODL_Coclustering KhiopsNativeInterface KNITransfer norm_jar khiops_jar + --target MODL MODL_Coclustering _khiopsgetprocnumber KhiopsNativeInterface KNITransfer norm_jar khiops_jar - name: Build package with CPack run: cd build/linux-gcc-release && cpack -G DEB - name: Rename the packages to include the ubuntu codename @@ -77,13 +80,15 @@ jobs: test: needs: build runs-on: ubuntu-latest + env: + DEBIAN_FRONTEND: noninteractive defaults: run: shell: bash strategy: fail-fast: false matrix: - os: ['debian:10', 'debian:11', 'debian:12', 'ubuntu:22.04', 'ubuntu:20.04'] + os: ['debian:10', 'debian:11', 'debian:12', 'ubuntu:20.04', 'ubuntu:22.04', 'ubuntu:24.04'] container: image: ${{ matrix.os }} steps: @@ -103,7 +108,6 @@ jobs: path: artifacts - name: Install Khiops core run: | - export DEBIAN_FRONTEND="noninteractive" apt-get update dpkg -i ./artifacts/khiops-core* || true apt-get -f install -y @@ -117,16 +121,20 @@ jobs: uses: ./.github/actions/test-khiops-install - name: Test Khiops on Iris dataset uses: ./.github/actions/test-khiops-on-iris + with: + os-decription: ${{ env.ID }}-${{ env.VERSION_CODENAME }} test-kni: needs: build runs-on: ubuntu-latest + env: + DEBIAN_FRONTEND: noninteractive defaults: run: shell: bash strategy: fail-fast: false matrix: - os: [debian10, debian11, debian12, ubuntu22.04, ubuntu20.04] + os: [debian10, debian11, debian12, ubuntu20.04, ubuntu22.04, ubuntu24.04] container: image: ghcr.io/khiopsml/khiops/khiopsdev-${{ matrix.os }}:latest steps: @@ -160,8 +168,6 @@ jobs: contents: write runs-on: ubuntu-latest steps: - - name: Checkout sources - uses: actions/checkout@v4 - name: Download package artifacts uses: actions/download-artifact@v4 with: @@ -187,6 +193,9 @@ jobs: with: allowUpdates: true artifacts: '*.deb' + body: | + **This release is for testing purposes only and there is no support for it.** + **Go to https://khiops.org to install the latest supported version.** draft: false makeLatest: false prerelease: true diff --git a/.github/workflows/pack-macos.yml b/.github/workflows/pack-macos.yml index be98fee80..c81b035f2 100644 --- a/.github/workflows/pack-macos.yml +++ b/.github/workflows/pack-macos.yml @@ -14,7 +14,7 @@ jobs: uses: ./.github/actions/build-khiops with: preset-name: ${{env.PRESET_NAME}} - targets: MODL MODL_Coclustering KhiopsNativeInterface + targets: MODL MODL_Coclustering KhiopsNativeInterface _khiopsgetprocnumber override-flags: -DTESTING=OFF - name: Build package with CPack run: cd build/${{env.PRESET_NAME}} && cpack -G ZIP diff --git a/.github/workflows/pack-nsis.yml b/.github/workflows/pack-nsis.yml index 10a3ba95e..80c0008fb 100644 --- a/.github/workflows/pack-nsis.yml +++ b/.github/workflows/pack-nsis.yml @@ -21,28 +21,16 @@ jobs: khiops-version: ${{ steps.get-version.outputs.khiops-version }} runs-on: windows-latest steps: + - name: Checkout sources + uses: actions/checkout@v4 + - name: Check the tag consistency with the source version + if: github.ref_type == 'tag' + uses: ./.github/actions/check-tag-version - name: Install Java Temurin uses: actions/setup-java@v4 with: distribution: temurin java-version: '21' - - name: Obtain checkout ref - shell: bash - run: | - # We take the "pull request head" ref (by default it is a merged one) - if [[ "${{ github.event_name }}" == "pull_request" ]] - then - CHECKOUT_REF="${{ github.event.pull_request.head.sha }}" - # Otherwise the default checkout action ref - else - CHECKOUT_REF="${{ github.ref_name }}" - fi - echo "CHECKOUT_REF=$CHECKOUT_REF" >> $GITHUB_ENV - echo "Checkout ref: $CHECKOUT_REF" - - name: Checkout sources - uses: actions/checkout@v4 - with: - ref: ${{ env.CHECKOUT_REF }} - name: Put the package version on the environment and output id: get-version shell: bash @@ -69,7 +57,7 @@ jobs: with: preset-name: windows-msvc-release targets: MODL MODL_Coclustering norm_jar khiops_jar KhiopsNativeInterface - KNITransfer + KNITransfer _khiopsgetprocnumber override-flags: -DTESTING=OFF -DBUILD_JARS=ON - name: Build NSIS package shell: pwsh @@ -90,7 +78,7 @@ jobs: - name: Build ZIP packages shell: bash run: |- - cd build/windows-msvc-release && CPACK -G ZIP + cd build/windows-msvc-release && CPACK -G ZIP - name: Upload the Installer Artifact uses: actions/upload-artifact@v4 with: @@ -129,7 +117,11 @@ jobs: - name: Checkout the khiops sources uses: actions/checkout@v4 - name: Test the installation + uses: ./.github/actions/test-khiops-install + - name: Test Khiops on Iris dataset uses: ./.github/actions/test-khiops-on-iris + with: + os-decription: windows test-kni: needs: build runs-on: windows-2022 @@ -146,7 +138,6 @@ jobs: 7z x KNI-${{ needs.build.outputs.khiops-version }}.zip -o${GITHUB_WORKSPACE}/KNI - name: Test KNI zip uses: ./.github/actions/test-kni - # Release is only executed on tags release: if: github.ref_type == 'tag' @@ -180,7 +171,9 @@ jobs: with: allowUpdates: true artifacts: khiops-*-setup.exe,KNI-*.zip,kni-transfer-*.zip - body: '**For testing purposes only**' + body: | + **This release is for testing purposes only and there is no support for it.** + **Go to https://khiops.org to install the latest supported version.** draft: false makeLatest: false prerelease: true diff --git a/.github/workflows/pack-rpm.yml b/.github/workflows/pack-rpm.yml index 93d33de10..2c9563055 100644 --- a/.github/workflows/pack-rpm.yml +++ b/.github/workflows/pack-rpm.yml @@ -32,6 +32,9 @@ jobs: steps: - name: Checkout sources uses: actions/checkout@v4 + - name: Check the tag consistency with the source version + if: github.ref_type == 'tag' + uses: ./.github/actions/check-tag-version - name: Put OS info on the environment run: | source /etc/os-release @@ -48,7 +51,7 @@ jobs: - name: Build Khiops binaries run: | cmake --build --preset linux-gcc-release --parallel \ - --target MODL MODL_Coclustering KhiopsNativeInterface KNITransfer norm_jar khiops_jar + --target MODL MODL_Coclustering _khiopsgetprocnumber KhiopsNativeInterface KNITransfer norm_jar khiops_jar - name: Build package with CPack run: cd build/linux-gcc-release && cpack -G RPM - name: Rename packages with codename @@ -112,6 +115,8 @@ jobs: uses: ./.github/actions/test-khiops-install - name: Test Khiops on Iris dataset uses: ./.github/actions/test-khiops-on-iris + with: + os-decription: rockylinux-${{ matrix.rocky_version }} test-kni: needs: build runs-on: ubuntu-latest @@ -155,8 +160,6 @@ jobs: contents: write runs-on: ubuntu-latest steps: - - name: Checkout sources - uses: actions/checkout@v4 - name: Download package artifacts uses: actions/download-artifact@v4 with: @@ -182,7 +185,9 @@ jobs: with: allowUpdates: true artifacts: '*.rpm' - body: '**For testing purposes only**' + body: | + **This release is for testing purposes only and there is no support for it.** + **Go to https://khiops.org to install the latest supported version.** draft: false makeLatest: false prerelease: true diff --git a/.github/workflows/run-standard-tests.yml b/.github/workflows/run-standard-tests.yml index e8ddc44b8..6425fda79 100644 --- a/.github/workflows/run-standard-tests.yml +++ b/.github/workflows/run-standard-tests.yml @@ -159,16 +159,16 @@ jobs: shell: bash run: | if [[ "${{ matrix.running-mode }}" == "parallel" ]] ; then - export KhiopsMPIProcessNumber=4 + export PARALLEL_ARG="-p 4" fi if [[ "${{ matrix.config }}" == "release" ]] ; then - python $TEST_PY test/LearningTest/TestKhiops/Standard ${BIN_PATH} + python $TEST_PY test/LearningTest/TestKhiops/Standard ${BIN_PATH} ${PARALLEL_ARG} if [[ "${{ matrix.running-mode }}" != "parallel" ]] ; then python $TEST_PY test/LearningTest/TestCoclustering/Standard ${BIN_PATH} python $TEST_PY test/LearningTest/TestKNI/Standard ${BIN_PATH} fi else - python $TEST_PY test/LearningTest/TestKhiops/Standard/IrisLight ${BIN_PATH} + python $TEST_PY test/LearningTest/TestKhiops/Standard/IrisLight ${BIN_PATH} ${PARALLEL_ARG} if [[ "${{ matrix.running-mode }}" != "parallel" ]] ; then python $TEST_PY test/LearningTest/TestCoclustering/Standard/Iris ${BIN_PATH} python $TEST_PY test/LearningTest/TestKNI/Standard/Iris ${BIN_PATH} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 72db48212..8504126b7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,7 +16,7 @@ repos: hooks: - id: cmake-format - repo: https://github.com/lyz-code/yamlfix/ - rev: 1.16.0 + rev: 1.17.0 hooks: - id: yamlfix # Exclude conda's meta.yaml because @@ -43,4 +43,4 @@ repos: name: update-copyright entry: python scripts/update-copyright.py language: system - types_or: [c, c++, java] + types_or: [c, c++, java, python] diff --git a/CMakeLists.txt b/CMakeLists.txt index 2d9540cc1..df92953ee 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -72,10 +72,24 @@ if(EXISTS "/etc/os-release") endif() endif() +# Check if it is conda environment +set(IS_CONDA false) +if(DEFINED ENV{CONDA_PREFIX}) + set(IS_CONDA true) + message(STATUS "Auto-detected conda environment") +endif() + +if(IS_CONDA AND MPI) + if(NOT DEFINED ENV{mpi}) + message( + WARNING "You are building in a conda environment without using `conda build`. find_mpi may not work as expected.") + endif() +endif() + # On Fedora distros, the binaries compiled with mpi must be located under the mpi location furthermore they have to be # suffixed by the name of the mpi implementation. These 2 items are given by MPI_BIN and MPI_SUFFIX when the module # environment is loaded source /etc/profile.d/modules.sh module load mpi/mpich-x86_64 -if(IS_FEDORA_LIKE) +if(IS_FEDORA_LIKE AND NOT IS_CONDA) if(DEFINED ENV{MPI_BIN}) set(MPI_BIN $ENV{MPI_BIN}) else() @@ -92,7 +106,7 @@ if(IS_FEDORA_LIKE) "MPI_SUFFIX is not defined, you have to load the mpi module e.g.:\n source /etc/profile.d/modules.sh && module load mpi/mpich-x86_64" ) endif(DEFINED ENV{MPI_SUFFIX}) -endif(IS_FEDORA_LIKE) +endif(IS_FEDORA_LIKE AND NOT IS_CONDA) # Set the location of the built artifacts: # @@ -111,7 +125,7 @@ message(STATUS "Libraries will be stored in ${CMAKE_BINARY_DIR}/lib/") set(TMP_DIR ${PROJECT_BINARY_DIR}/tmp) # Set module path for the project -set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/packaging") +set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/packaging" ; "${CMAKE_CURRENT_SOURCE_DIR}/scripts") set(CMAKE_CXX_VISIBILITY_PRESET hidden) set(CMAKE_VISIBILITY_INLINES_HIDDEN 1) @@ -126,31 +140,14 @@ if(MPI) find_package(MPI 2.0 REQUIRED) message( STATUS - "MPI command line: ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} ${MPIEXEC_MAX_NUMPROCS} ${MPIEXEC_PREFLAGS} EXECUTABLE ${MPIEXEC_POSTFLAGS} ARGS" + "MPI command line: ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${MPIEXEC_MAX_NUMPROCS} ${MPIEXEC_PREFLAGS} EXECUTABLE ${MPIEXEC_POSTFLAGS} ARGS" ) endif() # Detects the mpi implementation if(UNIX AND MPI) - string(FIND "${MPI_LIBRARIES}" openmpi POS) - if(POS GREATER -1) - set(MPI_IMPL "openmpi") - endif() - - string(FIND "${MPI_LIBRARIES}" mpich POS) - if(POS GREATER -1) - set(MPI_IMPL "mpich") - endif() - - string(FIND "${MPI_LIBRARIES}" intel POS) - if(POS GREATER -1) - set(MPI_IMPL "intel") - endif() - if(MPI_IMPL) - message(STATUS "Auto-detected MPI implementation: ${MPI_IMPL}") - else() - message(STATUS "Unable to detect the MPI implementation: no suffix will be added to binaries name") - endif() + include(get_mpi_implementation) + get_mpi_implementation() # Set MPI suffix if it is not defined with the environment variables (like on Fedora distros) if(NOT MPI_SUFFIX AND MPI_IMPL) @@ -273,6 +270,10 @@ function(set_unix_khiops_options target) -Wno-inconsistent-missing-override) endif() + # Disable "floating-point expression contraction" on clang and gcc to improve reproducibility beetween x86_64 and + # arm64 + target_compile_options(${target} PRIVATE -ffp-contract=off) + endfunction(set_unix_khiops_options) # Sets the compiling options for MSVC diff --git a/packaging/common/khiops/README.txt b/packaging/common/khiops/README.txt index c63413ebf..a02f69a05 100644 --- a/packaging/common/khiops/README.txt +++ b/packaging/common/khiops/README.txt @@ -69,13 +69,13 @@ Configuration: - macOS (via conda, see https://khiops.org), intel and ARM, 64 bits Windows software: - - Microsoft MPI 10.1.3 + - Microsoft MPI 10.1.3 for native installations - Automatic detection and silent installation during the Khiops installation process on Windows - For a silent installation, run the installer with /S option, and /D=installationDir to choose a specific installation directory. Linux software: - - Java Runtime Environment V7 or higher, mpich (>3.0), libstdc++6 + - Java Runtime Environment V7 or higher, OpenMPI (> 4.0) for native installations, libstdc++6 - Automatic detection and silent installation during the Khiops installation process diff --git a/packaging/common/khiops/WHATSNEW.txt b/packaging/common/khiops/WHATSNEW.txt index 74239df23..e68cfd92b 100644 --- a/packaging/common/khiops/WHATSNEW.txt +++ b/packaging/common/khiops/WHATSNEW.txt @@ -2,6 +2,20 @@ Release Notes for Khiops 10 Version Series ========================================== +Version 10.2.3 +============== + +Khiops is now available for Ubuntu 24.04. + +The GUI now uses all available processor cores if instructed to do so. + +New version of visualization and covisualization tools, with improvements and bug fixes. + +Internal improvements linked to packaging, in particular with the systematic use of the 'khiops_env' script. + +Other minor improvements and bug fixes. + + Version 10.2.2 ============== On Windows, the Khiops installation program now comes with the open-source Java JRE JustJ instead of Oracle's JDK. diff --git a/packaging/conda/README.md b/packaging/conda/README.md index a0e40c206..75e6a58db 100644 --- a/packaging/conda/README.md +++ b/packaging/conda/README.md @@ -1,18 +1,35 @@ # Khiops Conda Packaging Scripts ## How to Build -You'll need `conda-build` installed in your system. +We need `conda-build` installed in the system. + +We need to make sure that Conda is configured to use conda-forge as its default channel and that the vanilla default channel (defaults) is removed, e.g. by writing: + +```bash +$ conda config --add channels conda-forge +$ conda config --remove channels defaults +``` + +Or if we want to keep the vanilla defaults channel, we could give the priority to conda-forge: + +```bash +$ conda config --add channels conda-forge +$ conda config --set channel_priority strict +``` + +Thus, the user's $HOME/.condarc file would be updated accordingly and --channel conda-forge would no longer be needed. + +In the CI this is ensured by the usage of miniforge-version in the invocation of the setup-miniconda action. + +To build `khiops-core` package, we need to run these commands at the root of the repo (it will leave a ready to use conda channel in `./khiops-conda`): ```bash # At the root of the repo -# These commands will leave a ready to use conda channel in `./khiops-conda` -# Windows -conda build --output-folder ./khiops-conda packaging/conda +# khiops version must be set before launching the build (can be retrieved with the script scripts/khiops-version). +export KHIOPS_VERSION=$(scripts/khiops-version) -# Linux/macOS -# Note: We need the conda-forge channel to obtain the pinned versions of MPICH -conda build --channel conda-forge --output-folder ./khiops-conda packaging/conda +conda build --output-folder ./khiops-conda packaging/conda ``` ### Signing the Executables in macOS @@ -32,4 +49,4 @@ following environment variables: If the process is executed as root (eg. Github Runner) then there is no need to input a password to access the keychain. Otherwise you'll be prompt with it. -For more details see the comments in the signing section of `build.sh`. +For more details see the comments in the signing section of `build.sh`. \ No newline at end of file diff --git a/packaging/conda/bld.bat b/packaging/conda/bld.bat index a9d8a90df..24ddfb3f5 100644 --- a/packaging/conda/bld.bat +++ b/packaging/conda/bld.bat @@ -2,14 +2,31 @@ REM Echo all output @echo on REM Build the Khiops binaries -cmake --preset windows-msvc-release -DBUILD_JARS=OFF -DTESTING=OFF -cmake --build --preset windows-msvc-release --parallel --target MODL MODL_Coclustering KhiopsNativeInterface KNITransfer +REM Specify empty target platform and generator toolset for CMake with Ninja on +REM Windows +REM Ninja does not expect target platform and generator toolset. +REM However, CMake Windows presets set these, which results in Ninja failure. +cmake --preset windows-msvc-release -DBUILD_JARS=OFF -DTESTING=OFF -A "" -T "" +cmake --build --preset windows-msvc-release --parallel --target MODL MODL_Coclustering KhiopsNativeInterface KNITransfer _khiopsgetprocnumber -REM Copy the MODL binaries to the Conda PREFIX path mkdir %PREFIX%\bin + +REM Copy the khiops-core binaries to the Conda PREFIX path: MODL, MODL_Cocluetsring and _khiopsgetprocnumber. +REM This last one is used by khiops_env to get the physical cores number copy build\windows-msvc-release\bin\MODL.exe %PREFIX%\bin copy build\windows-msvc-release\bin\MODL_Coclustering.exe %PREFIX%\bin +copy build\windows-msvc-release\bin\_khiopsgetprocnumber.exe %PREFIX%\bin + +REM Copy the KNITransfer for the kni-transfer package (a test package for kni) copy build\windows-msvc-release\bin\KNITransfer.exe %PREFIX%\bin -copy build\windows-msvc-release\bin\KhiopsNativeInterface.dll %PREFIX%\lib + +REM Copy the KhiopsNativeInterface libs for the kni package +copy build\windows-msvc-release\bin\KhiopsNativeInterface.dll %PREFIX%\bin +copy build\windows-msvc-release\lib\KhiopsNativeInterface.lib %PREFIX%\lib + +REM Copy the scripts to the Conda PREFIX path +copy build\windows-msvc-release\tmp\khiops_env.cmd %PREFIX%\bin +copy packaging\windows\khiops_coclustering.cmd %PREFIX%\bin +copy packaging\windows\khiops.cmd %PREFIX%\bin if errorlevel 1 exit 1 diff --git a/packaging/conda/build.sh b/packaging/conda/build.sh index a9c943354..c430f9570 100644 --- a/packaging/conda/build.sh +++ b/packaging/conda/build.sh @@ -3,25 +3,23 @@ # Set-up the shell to behave more like a general-purpose programming language set -euo pipefail -# Choose the build preset for macOS/Linux -if [[ "$(uname)" == "Linux" ]] -then - CMAKE_PRESET="linux-gcc-release" -else - CMAKE_PRESET="macos-clang-release" -fi - # Configure project -cmake --fresh --preset $CMAKE_PRESET -DBUILD_JARS=OFF -DTESTING=OFF +cmake -B build/conda -S . -D BUILD_JARS=OFF -D TESTING=OFF -D CMAKE_BUILD_TYPE=Release -G Ninja # Build MODL and MODL_Coclustering -cmake --build --preset $CMAKE_PRESET --parallel \ - --target MODL MODL_Coclustering KhiopsNativeInterface KNITransfer +cmake --build build/conda --parallel \ + --target MODL MODL_Coclustering KhiopsNativeInterface KNITransfer _khiopsgetprocnumber # Move the binaries to the Conda PREFIX path -mv ./build/$CMAKE_PRESET/bin/MODL* "$PREFIX/bin" -mv ./build/$CMAKE_PRESET/bin/KNITransfer* "$PREFIX/bin" -mv ./build/$CMAKE_PRESET/lib/libKhiopsNativeInterface* "$PREFIX/lib" +mv ./build/conda/bin/MODL* "$PREFIX/bin" +mv ./build/conda/bin/_khiopsgetprocnumber* "$PREFIX/bin" +mv ./build/conda/bin/KNITransfer* "$PREFIX/bin" +mv ./build/conda/lib/libKhiopsNativeInterface* "$PREFIX/lib" + +# Copy the scripts to the Conda PREFIX path +cp ./build/conda/tmp/khiops_env "$PREFIX/bin" +cp ./packaging/linux/common/khiops "$PREFIX/bin" +cp ./packaging/linux/common/khiops_coclustering "$PREFIX/bin" # Custom rpath relocation and signing executables for macOS in arm64 # @@ -50,8 +48,7 @@ mv ./build/$CMAKE_PRESET/lib/libKhiopsNativeInterface* "$PREFIX/lib" # - KHIOPS_APPLE_TMP_KEYCHAIN_PASSWORD: A temporary password for the a short-lived keychain # cd .. -if [[ "$(uname)" == "Darwin" && -n "${KHIOPS_APPLE_CERTIFICATE_COMMON_NAME-}" ]] -then +if [[ "$(uname)" == "Darwin" && -n "${KHIOPS_APPLE_CERTIFICATE_COMMON_NAME-}" ]]; then # Inform about the signature process echo "Signing binaries with the certificate named '${KHIOPS_APPLE_CERTIFICATE_COMMON_NAME}'" @@ -60,13 +57,14 @@ then install_name_tool -delete_rpath "$PREFIX/lib" "$PREFIX/bin/MODL" install_name_tool -delete_rpath "$PREFIX/lib" "$PREFIX/bin/MODL" install_name_tool -delete_rpath "$PREFIX/lib" "$PREFIX/bin/MODL_Coclustering" + install_name_tool -delete_rpath "$PREFIX/lib" "$PREFIX/bin/_khiopsgetprocnumber" # Add the relative rpath as conda build would install_name_tool -add_rpath "@loader_path/../lib" "$PREFIX/bin/MODL" install_name_tool -add_rpath "@loader_path/../lib" "$PREFIX/bin/MODL_Coclustering" + install_name_tool -add_rpath "@loader_path/../lib" "$PREFIX/bin/_khiopsgetprocnumber" - if [[ -n "${KHIOPS_APPLE_CERTIFICATE_BASE64-}" ]] - then + if [[ -n "${KHIOPS_APPLE_CERTIFICATE_BASE64-}" ]]; then # Keychain setup slightly modified from: https://stackoverflow.com/a/68577995 # Before importing identity # - Set the default user login keychain @@ -82,8 +80,7 @@ then sudo security unlock-keychain -p "$KHIOPS_APPLE_TMP_KEYCHAIN_PASSWORD" kh-tmp.keychain # Add identity (certificate + private key) to the temporary keychain - echo "$KHIOPS_APPLE_CERTIFICATE_BASE64" \ - | base64 --decode -i - -o kh-cert.p12 + echo "$KHIOPS_APPLE_CERTIFICATE_BASE64" | base64 --decode -i - -o kh-cert.p12 sudo security import kh-cert.p12 \ -k kh-tmp.keychain \ -P "$KHIOPS_APPLE_CERTIFICATE_PASSWORD" \ @@ -109,14 +106,15 @@ then # Sign the executables and check $CODESIGN --force --sign "$KHIOPS_APPLE_CERTIFICATE_COMMON_NAME" "$PREFIX/bin/MODL" $CODESIGN --force --sign "$KHIOPS_APPLE_CERTIFICATE_COMMON_NAME" "$PREFIX/bin/MODL_Coclustering" + $CODESIGN --force --sign "$KHIOPS_APPLE_CERTIFICATE_COMMON_NAME" "$PREFIX/bin/_khiopsgetprocnumber" $CODESIGN --force --sign "$KHIOPS_APPLE_CERTIFICATE_COMMON_NAME" "$KNI_PATH" $CODESIGN -d -vvv "$PREFIX/bin/MODL" $CODESIGN -d -vvv "$PREFIX/bin/MODL_Coclustering" + $CODESIGN -d -vvv "$PREFIX/bin/_khiopsgetprocnumber" $CODESIGN -d -vvv "$KNI_PATH" # Remove the temporary keychain and restore the login keychain as default if created - if [[ -n "${KHIOPS_APPLE_CERTIFICATE_BASE64-}" ]] - then + if [[ -n "${KHIOPS_APPLE_CERTIFICATE_BASE64-}" ]]; then sudo security delete-keychain kh-tmp.keychain sudo security list-keychains -d user -s login.keychain fi diff --git a/packaging/conda/conda_build_config.yaml b/packaging/conda/conda_build_config.yaml index 185e09f7c..6fa1b4cf6 100644 --- a/packaging/conda/conda_build_config.yaml +++ b/packaging/conda/conda_build_config.yaml @@ -16,12 +16,14 @@ mpi: c_compiler: - gcc # [linux] - clang # [osx] + - vs2022 # [win] c_compiler_version: - 12 # [linux] - 16 # [osx] cxx_compiler: - gxx # [linux] - clangxx # [osx] + - vs2022 # [win] cxx_compiler_version: - 12 # [linux] - 16 # [osx] diff --git a/packaging/conda/meta.yaml b/packaging/conda/meta.yaml index 9fa38014e..0234c71b0 100644 --- a/packaging/conda/meta.yaml +++ b/packaging/conda/meta.yaml @@ -1,6 +1,8 @@ +{% set version = os.environ.get('KHIOPS_VERSION').replace("-", "") %} + package: name: khiops-binaries - version: {{ os.environ.get('KHIOPS_VERSION') }} + version: {{ version }} source: path: ../../ @@ -40,7 +42,7 @@ outputs: # khiops-core package - name: khiops-core - version: {{ os.environ.get('KHIOPS_VERSION') }} + version: {{ version }} requirements: build: - cmake @@ -52,18 +54,27 @@ outputs: - {{ mpi }} files: - bin/MODL* + - bin/_khiopsgetprocnumber* + - bin/khiops_env # [linux or osx] + - bin/khiops # [linux or osx] + - bin/khiops_coclustering # [linux or osx] + - bin/khiops_env.cmd # [win] + - bin/khiops.cmd # [win] + - bin/khiops_coclustering.cmd # [win] test: commands: - - MODL -s - - MODL_Coclustering -s + - khiops_env --env + - khiops -s + - khiops_coclustering -s # kni package (do not need khiops-core as a runtime dependency) - name: kni - version: {{ os.environ.get('KHIOPS_VERSION') }} + version: {{ version }} files: - lib/libKhiopsNativeInterface.so* # [linux] - lib/libKhiopsNativeInterface*.dylib # [osx] - - lib/KhiopsNativeInterface.dll # [win] + - bin/KhiopsNativeInterface.dll # [win] + - lib/KhiopsNativeInterface.lib # [win] requirements: build: - cmake @@ -72,7 +83,7 @@ outputs: # kni-transfer package (designed only to test kni) - name: kni-transfer - version: {{ os.environ.get('KHIOPS_VERSION') }} + version: {{ version }} requirements: build: - cmake diff --git a/packaging/dockerfiles/Dockerfile.ubuntu24.04 b/packaging/dockerfiles/Dockerfile.ubuntu24.04 new file mode 100644 index 000000000..85eb94e98 --- /dev/null +++ b/packaging/dockerfiles/Dockerfile.ubuntu24.04 @@ -0,0 +1,25 @@ +FROM ubuntu:24.04 +LABEL maintainer="khiops.team@orange.com" +LABEL description="Container with the dependencies to build and package Khiops" + +ENV DEBIAN_FRONTEND=noninteractive + +RUN true \ + && apt-get -y update \ + && apt-get -y --no-install-recommends install \ + build-essential \ + cmake \ + coreutils \ + debhelper \ + openjdk-8-jdk \ + devscripts \ + fakeroot \ + libmpich-dev \ + openmpi-bin \ + libopenmpi-dev \ + nano \ + ninja-build \ + curl \ + && rm -rf /var/lib/apt/lists/* \ + && apt-get clean \ + && true diff --git a/packaging/install.cmake b/packaging/install.cmake index f52859de5..42ee29ebe 100644 --- a/packaging/install.cmake +++ b/packaging/install.cmake @@ -67,60 +67,76 @@ endif() if(UNIX) - # replace MPIEXEC MPIEXEC_NUMPROC_FLAG and MPI_IMPL KHIOPS_MPI_EXTRA_FLAG ADDITIONAL_EN_VAR + # Set khiops and khiops_coclustering paths according to the environment (conda, fedora, etc) + if(IS_CONDA) + set(KHIOPS_PATH "$(get_script_dir)") + set(KHIOPS_COCLUSTERING_PATH "$(get_script_dir)") + set(GET_PROC_NUMBER_PATH "$(get_script_dir)") + set(IS_CONDA_VAR "\n# Inside conda environment\nexport _IS_CONDA=true") + set(SET_KHIOPS_DRIVERS_PATH "\n# Drivers search path\nexport KHIOPS_DRIVERS_PATH=$(dirname $(get_script_dir))/lib") + else() + if(IS_FEDORA_LIKE) + set(KHIOPS_PATH "${MPI_BIN}/khiops/") + else() + set(KHIOPS_PATH "/usr/bin/") + endif(IS_FEDORA_LIKE) + set(KHIOPS_COCLUSTERING_PATH "/usr/bin/") + set(GET_PROC_NUMBER_PATH "/usr/bin/") + + configure_file(${PROJECT_SOURCE_DIR}/packaging/linux/common/khiops_env/use_environment_module.sh.in + ${TMP_DIR}/use_environment_module.sh @ONLY NEWLINE_STYLE UNIX) + file(READ ${TMP_DIR}/use_environment_module.sh USE_ENVIRONMENT_MODULE) + file(READ ${PROJECT_SOURCE_DIR}/packaging/linux/common/khiops_env/java_settings.sh KHIOPS_JAVA_SETTINGS) + + endif(IS_CONDA) + + # replace MPIEXEC MPIEXEC_NUMPROC_FLAG and MPI_IMPL KHIOPS_MPI_EXTRA_FLAG ADDITIONAL_ENV_VAR if("${MPI_IMPL}" STREQUAL "openmpi") - set(KHIOPS_MPI_EXTRA_FLAG "--allow-run-as-root --quiet") - set(ADDITIONAL_EN_VAR "export OMPI_MCA_btl_vader_single_copy_mechanism=none # issue on docker") + set(KHIOPS_MPI_EXTRA_FLAG "--allow-run-as-root") + set(KHIOPS_MPI_QUIET "--quiet") + set(ADDITIONAL_ENV_VAR "export OMPI_MCA_btl_vader_single_copy_mechanism=none # issue on docker") + set(ADDITIONAL_ENV_VAR_DISPLAY + " echo OMPI_MCA_btl_vader_single_copy_mechanism \"$OMPI_MCA_btl_vader_single_copy_mechanism\"") if(IS_FEDORA_LIKE) - set(ADDITIONAL_EN_VAR "${ADDITIONAL_EN_VAR}\nexport PSM3_DEVICES=self # issue one rocky linux") + set(ADDITIONAL_ENV_VAR "${ADDITIONAL_ENV_VAR}\nexport PSM3_DEVICES=self # issue on rocky linux") + set(ADDITIONAL_ENV_VAR_DISPLAY "${ADDITIONAL_ENV_VAR_DISPLAY}\n echo PSM3_DEVICES \"$PSM3_DEVICES\"") endif() + set(MPIEXEC_HOSTFILE_FLAG "--hostfile") + file(READ ${PROJECT_SOURCE_DIR}/packaging/linux/common/khiops_env/export_env_variables.sh EXPORT_ENV_VARIABLES) + elseif("${MPI_IMPL}" STREQUAL "mpich") + set(MPIEXEC_HOSTFILE_FLAG "-f") + # Set localhost on MacOS (see issue # https://github.com/pmodels/mpich/issues/4710) + if(APPLE) + set(KHIOPS_MPI_EXTRA_FLAG "-host localhost") + endif(APPLE) endif() # Add header comment to the variable definition (if any variable is defined) - if(ADDITIONAL_EN_VAR) - set(ADDITIONAL_EN_VAR "# Additional variables for MPI\n${ADDITIONAL_EN_VAR}") + if(ADDITIONAL_ENV_VAR) + set(ADDITIONAL_ENV_VAR "\n# Additional variables for MPI\n${ADDITIONAL_ENV_VAR}") + set(ADDITIONAL_ENV_VAR_DISPLAY "\n # Additional variables for MPI\n${ADDITIONAL_ENV_VAR_DISPLAY}") endif() - configure_file(${PROJECT_SOURCE_DIR}/packaging/linux/common/khiops-env.in ${TMP_DIR}/khiops-env @ONLY - NEWLINE_STYLE UNIX) - configure_file(${PROJECT_SOURCE_DIR}/packaging/linux/debian/khiops-core/postinst.in ${TMP_DIR}/postinst @ONLY - NEWLINE_STYLE UNIX) - - if(NOT IS_FEDORA_LIKE) - install(TARGETS MODL MODL_Coclustering RUNTIME DESTINATION usr/bin COMPONENT KHIOPS_CORE) - - # We install the binary with mpi suffix and create a symlink without the suffix + # Get the real file name of MODL e.g MODL_openmpi + if(CMAKE_SYSTEM_NAME STREQUAL "Linux") get_target_property(MODL_NAME MODL OUTPUT_NAME) - execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink /usr/bin/${MODL_NAME} ${TMP_DIR}/MODL) - install( - FILES ${TMP_DIR}/MODL - DESTINATION usr/bin - COMPONENT KHIOPS_CORE) else() + # the above line fails on macOS. But prefix is added to the binary name only on linux... + set(MODL_NAME "MODL") + endif() - # On fedora binaries built with mpi must follow these rules : - # - # - the binaries MUST be suffixed with $MPI_SUFFIX - # - MPI implementation specific files MUST be installed in the directories used by the MPI compiler e.g. $MPI_BIN - # - # see https://docs.fedoraproject.org/en-US/packaging-guidelines/MPI/ - # - install(TARGETS MODL RUNTIME DESTINATION ./${MPI_BIN}/khiops COMPONENT KHIOPS_CORE) - install(TARGETS MODL_Coclustering RUNTIME DESTINATION /usr/bin COMPONENT KHIOPS_CORE) - - # We install the binary under $MPI_BIN and create a symlink to it - get_target_property(MODL_NAME MODL OUTPUT_NAME) - execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink ${MPI_BIN}/khiops/${MODL_NAME} ${TMP_DIR}/MODL) - install( - FILES ${TMP_DIR}/MODL - DESTINATION usr/bin - COMPONENT KHIOPS_CORE) + configure_file(${PROJECT_SOURCE_DIR}/packaging/linux/common/khiops_env/khiops_env.in ${TMP_DIR}/khiops_env @ONLY + NEWLINE_STYLE UNIX) + configure_file(${PROJECT_SOURCE_DIR}/packaging/linux/debian/khiops-core/postinst.in ${TMP_DIR}/postinst @ONLY + NEWLINE_STYLE UNIX) - endif() + install(TARGETS MODL RUNTIME DESTINATION ./${KHIOPS_PATH} COMPONENT KHIOPS_CORE) + install(TARGETS MODL_Coclustering RUNTIME DESTINATION ./${KHIOPS_COCLUSTERING_PATH} COMPONENT KHIOPS_CORE) + install(TARGETS _khiopsgetprocnumber RUNTIME DESTINATION ./${GET_PROC_NUMBER_PATH} COMPONENT KHIOPS_CORE) install( PROGRAMS ${PROJECT_SOURCE_DIR}/packaging/linux/common/khiops - ${PROJECT_SOURCE_DIR}/packaging/linux/common/khiops_coclustering ${TMP_DIR}/khiops-env + ${PROJECT_SOURCE_DIR}/packaging/linux/common/khiops_coclustering ${TMP_DIR}/khiops_env DESTINATION usr/bin COMPONENT KHIOPS_CORE) @@ -151,4 +167,20 @@ if(UNIX) FILES ${CMAKE_BINARY_DIR}/jars/norm.jar ${CMAKE_BINARY_DIR}/jars/khiops.jar DESTINATION usr/share/khiops COMPONENT KHIOPS) + +else(UNIX) + + if(IS_CONDA) + set(GUI_STATUS "false") + set(SET_MPI "SET_MPI_CONDA") + set(IS_CONDA_VAR "REM Inside conda environment\r\nset \"_IS_CONDA=true\"") + set(SET_KHIOPS_DRIVERS_PATH "REM Drivers search path\r\nset \"KHIOPS_DRIVERS_PATH=%_KHIOPS_HOME%\\lib\"") + else() + set(SET_MPI "SET_MPI_SYSTEM_WIDE") + set(GUI_STATUS "true") + endif() + + configure_file(${PROJECT_SOURCE_DIR}/packaging/windows/khiops_env.cmd.in ${TMP_DIR}/khiops_env.cmd @ONLY + NEWLINE_STYLE CRLF) + endif(UNIX) diff --git a/packaging/linux/common/khiops b/packaging/linux/common/khiops index 82b1032fa..fc649783d 100644 --- a/packaging/linux/common/khiops +++ b/packaging/linux/common/khiops @@ -3,24 +3,98 @@ # Run Khiops tool # see the khiops_env script for full documentation on the environment variables used by Khiops -if [[ ! -f /usr/bin/khiops-env ]]; then - echo "/usr/bin/khiops-env is missing" +error() { + if [ -f /dev/log ]; then + logger "$*" --tag khiops --stderr + else + echo "$*" >&2 + fi + if [ -n "$DISPLAY" ] && command -v notify-send >/dev/null 2>&1; then + notify-send "Khiops" "$*" -i /usr/share/pixmaps/khiops.png -u normal + fi + unset _IS_CONDA exit 1 +} + +warning() { + if [ -f /dev/log ]; then + logger "$*" --tag khiops --stderr + else + echo "$*" >&2 + fi + if [ -n "$DISPLAY" ]; then + notify-send "Khiops" "$*" -i /usr/share/pixmaps/khiops.png -u normal + fi +} + +get_script_dir() { + SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) + echo "$SCRIPT_DIR/" +} + +if [[ ! -f "$(get_script_dir)"/khiops_env ]]; then + echo "$(get_script_dir)/khiops_env is missing" + exit 1 +fi + +# Does not print messages from mpi +if [ -z "$KHIOPS_MPI_VERBOSE" ]; then + KHIOPS_MPI_VERBOSE="false" fi -# initialize Khiops env variables -source /usr/bin/khiops-env --env > /dev/null +# Initialize Khiops env variables +source $(get_script_dir)/khiops_env "" + +# Test if batch mode from parameters +KHIOPS_BATCH_MODE=false +for ((i = 1; i <= $#; i++)); do + if [ ${!i} = "-b" ] || [ ${!i} = "-h" ] || [ ${!i} = "-s" ] || [ ${!i} = "-v" ]; then + KHIOPS_BATCH_MODE=true + break + fi +done + +if [[ ! -e $KHIOPS_PATH ]]; then + error "Binary not found ($KHIOPS_PATH)" +fi + +# Display error with mpi configuration +if [ -n "$KHIOPS_MPI_ERROR" ]; then + warning "$KHIOPS_MPI_ERROR" +fi export CLASSPATH=$CLASSPATH:$KHIOPS_CLASSPATH export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$KHIOPS_JAVA_PATH -if [[ $# -eq 0 ]]; then - # run without parameters - # run and save scenario and log files in directory KHIOPS_LAST_RUN_DIR - $KHIOPS_MPI_COMMAND MODL -o "${KHIOPS_LAST_RUN_DIR}"/scenario._kh -e "${KHIOPS_LAST_RUN_DIR}"/log.txt +launch_khiops() { + if [[ $# -eq 0 ]]; then + # run without parameters + # run and save scenario and log files in directory KHIOPS_LAST_RUN_DIR + $KHIOPS_MPI_COMMAND "$KHIOPS_PATH" -o "${KHIOPS_LAST_RUN_DIR}"/scenario._kh -e "${KHIOPS_LAST_RUN_DIR}"/log.txt + else + # run with parameters + $KHIOPS_MPI_COMMAND "$KHIOPS_PATH" "$@" + fi +} + +# Launck Khiops +if [ "$KHIOPS_BATCH_MODE" = true ]; then + launch_khiops "$@" else - # run with parameters - $KHIOPS_MPI_COMMAND MODL "$@" + if [ "$_IS_CONDA" = true ] || [ -z "$DISPLAY" ]; then + error "GUI is not available, please use the '-b' flag" + else + if [ -z "$KHIOPS_JAVA_PATH" ]; then + if [ -z "$KHIOPS_JAVA_ERROR" ]; then + error "GUI is not available, please either use the '-b' flag, or install the 'khiops' native package for your host operating system." + else + error "$KHIOPS_JAVA_ERROR" + fi + else + launch_khiops "$@" + fi + fi fi -exit $? \ No newline at end of file +unset _IS_CONDA +exit $? diff --git a/packaging/linux/common/khiops-env.in b/packaging/linux/common/khiops-env.in deleted file mode 100644 index 1dd6586d1..000000000 --- a/packaging/linux/common/khiops-env.in +++ /dev/null @@ -1,107 +0,0 @@ -#!/bin/bash - -help() -{ - echo "Usage: khiops-env [-h] [--env]" - echo "khiops-env is an internal script intended to be used by Khiops tool and Khiops'" - echo "wrappers only." - echo "If the --env flag is used, the environment list is printed in the standard output" - echo - echo "The following variables are used to set the path and classpath" - echo "for the prerequisite of Khiops." - echo - echo "KHIOPS_PATH: path of Khiops executable, to add in path" - echo "KHIOPS_MPI_COMMAND: MPI command to call the Khiops tool" - echo "KHIOPS_JAVA_PATH: path of Java tool, to add in path" - echo "KHIOPS_CLASSPATH: Khiops java libraries, to add in classpath" - echo - echo "If they are not already defined, the following variables used by" - echo "Khiops are set:" - echo - echo "KHIOPS_LAST_RUN_DIR: directory where Khiops writes output command" - echo " file and log (when not defined with -e and -o)" - echo "KHIOPS_PROC_NUMBER: processes number launched by Khiops (it's" - echo " default value corresponds to the number of physical cores of" - echo " the computer plus one)" - echo - echo "The following variables are not defined by default and can be used to" - echo "change some default properties of Khiops:" - echo - echo "KHIOPS_TMP_DIR: Khiops' temporary directory location (default : the" - echo " system default) This location can be modified from the tool as well" - echo "KHIOPS_MEMORY_LIMIT: Khiops' memory limit in MB (default : the system's memory limit)." - echo " The minimum value is 100 MB; this setting is ignored if it is above the system's memory limit." - echo " It can only be reduced from the tool." - echo "KHIOPS_RAW_GUI: graphical user interface for file name selection" - echo " . default behavior if not set: depending on the file drivers available for Khiops" - echo " . set to 'true' to allow file name selection with uri schemas" - echo " . set to 'false' to allow local file name selection only with a file selection dialog" -} - -[[ $# == 0 ]] && help && exit 1 -[[ $# == 1 && $1 == "-h" ]] && help && exit 0 -[[ $# == 1 && $1 != "--env" ]] && help && exit 1 - - -if [[ -z $KHIOPS_LAST_RUN_DIR ]]; then - export KHIOPS_LAST_RUN_DIR=/tmp/khiops/$USER -fi - -# Try to use environment modules to locate mpiexec if needed -if ! command -v mpiexec &> /dev/null -then - if source /etc/profile.d/modules.sh &> /dev/null - then - module unload mpi - module load mpi/@MPI_IMPL@-x86_64 - fi -fi - -# Set java environment if Khiops GUI is installed -if [ -f "/usr/share/khiops/khiops.jar" ]; then - if [ -z "$JAVA_HOME" ] && [ -f /usr/bin/java ]; then - JAVA_HOME=$(readlink -f /usr/bin/java | sed "s:bin/java::" ) - fi - if [ ! -d "$JAVA_HOME" ]; then - echo "The JAVA_HOME directory doesn't exist (${JAVA_HOME})." - unset JAVA_HOME - fi - if [ -n "$JAVA_HOME" ]; then - KHIOPS_JAVA_PATH=$(find -L "$JAVA_HOME" -name libjvm.so | xargs dirname) - KHIOPS_CLASSPATH=/usr/share/khiops/norm.jar:/usr/share/khiops/khiops.jar - else - echo "Java not found. Khiops cannot launch a graphical interface. Please install a Java Runtime Environment (>= 1.8)." - fi -fi - -@ADDITIONAL_EN_VAR@ - -# Number of processes in use (must be set according to the physical cores number) -if [[ -z $KHIOPS_PROC_NUMBER ]]; then - KHIOPS_PROC_NUMBER=$(lscpu -b -p=Core,Socket | grep -v '^#' | sort -u | wc -l) -fi - -if command -v mpiexec &> /dev/null -then - KHIOPS_MPI_COMMAND="mpiexec @KHIOPS_MPI_EXTRA_FLAG@ @MPIEXEC_NUMPROC_FLAG@ $KHIOPS_PROC_NUMBER" -else - echo "We didn't find mpiexec in the regular path. Parallel computation is unavailable: Khiops is launched in serial" - KHIOPS_MPI_COMMAND="" -fi - -# without more than 2 procs, we use the serial khiops -if [[ $KHIOPS_PROC_NUMBER -le 2 ]]; then - KHIOPS_MPI_COMMAND="" -fi - -KHIOPS_PATH=/usr/bin - -echo KHIOPS_PATH "$KHIOPS_PATH" -echo KHIOPS_MPI_COMMAND "$KHIOPS_MPI_COMMAND" -echo KHIOPS_JAVA_PATH "$KHIOPS_JAVA_PATH" -echo KHIOPS_CLASSPATH "$KHIOPS_CLASSPATH" -echo KHIOPS_LAST_RUN_DIR "$KHIOPS_LAST_RUN_DIR" -echo KHIOPS_PROC_NUMBER "$KHIOPS_PROC_NUMBER" -echo KHIOPS_TMP_DIR "$KHIOPS_TMP_DIR" -echo KHIOPS_MEMORY_LIMIT "$KHIOPS_MEMORY_LIMIT" - diff --git a/packaging/linux/common/khiops_coclustering b/packaging/linux/common/khiops_coclustering index 6e20ae5f7..87374afb8 100644 --- a/packaging/linux/common/khiops_coclustering +++ b/packaging/linux/common/khiops_coclustering @@ -1,26 +1,90 @@ #!/bin/bash -# Run Khiops Coclustering tool -# see the khiops_env script for full documentation on the environment variables used by Khiops Coclustering +# Run Khiops tool +# see the khiops_env script for full documentation on the environment variables used by Khiops -if [[ ! -f /usr/bin/khiops-env ]]; then - echo "/usr/bin/khiops-env is missing" +error() { + if [ -f /dev/log ]; then + logger "$*" --tag khiops --stderr + else + echo "$*" >&2 + fi + if [ -n "$DISPLAY" ] && command -v notify-send >/dev/null 2>&1; then + notify-send "Khiops" "$*" -i /usr/share/pixmaps/khiops.png -u normal + fi + unset _IS_CONDA + exit 1 +} + +warning() { + if [ -f /dev/log ]; then + logger "$*" --tag khiops --stderr + else + echo "$*" >&2 + fi + if [ -n "$DISPLAY" ]; then + notify-send "Khiops" "$*" -i /usr/share/pixmaps/khiops.png -u normal + fi +} + +get_script_dir() { + SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) + echo "$SCRIPT_DIR/" +} + +if [[ ! -f "$(get_script_dir)"/khiops_env ]]; then + echo "$(get_script_dir)/khiops_env is missing" exit 1 fi -# initialize Khiops env variables -source /usr/bin/khiops-env --env > /dev/null +# Initialize Khiops env variables +source $(get_script_dir)/khiops_env "" + +# Test if batch mode from parameters +KHIOPS_BATCH_MODE=false +for ((i = 1; i <= $#; i++)); do + if [ ${!i} = "-b" ] || [ ${!i} = "-h" ] || [ ${!i} = "-s" ] || [ ${!i} = "-v" ]; then + KHIOPS_BATCH_MODE=true + break + fi +done + +if [[ ! -e $KHIOPS_COCLUSTERING_PATH ]]; then + error "Binary not found ($KHIOPS_COCLUSTERING_PATH)" +fi export CLASSPATH=$CLASSPATH:$KHIOPS_CLASSPATH export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$KHIOPS_JAVA_PATH -if [[ $# -eq 0 ]]; then - # run without parameters - # run and save scenario and log files in directory KHIOPS_LAST_RUN_DIR - MODL_Coclustering -o "${KHIOPS_LAST_RUN_DIR}"/scenario._kh -e "${KHIOPS_LAST_RUN_DIR}"/log.txt +launch_khiops() { + if [[ $# -eq 0 ]]; then + # run without parameters + # run and save scenario and log files in directory KHIOPS_LAST_RUN_DIR + $KHIOPS_COCLUSTERING_PATH -o "${KHIOPS_LAST_RUN_DIR}"/scenario._kh -e "${KHIOPS_LAST_RUN_DIR}"/log.txt + else + # run with parameters + $KHIOPS_COCLUSTERING_PATH "$@" + fi +} + +# Launck Khiops +if [ "$KHIOPS_BATCH_MODE" = true ]; then + launch_khiops "$@" else - # run with parameters - MODL_Coclustering "$@" + if [ "$_IS_CONDA" = true ] || [ -z "$DISPLAY" ]; then + error "GUI is not available, please use the '-b' flag" + else + if [ -z "$KHIOPS_JAVA_PATH" ]; then + if [ -z "$KHIOPS_JAVA_ERROR" ]; then + error "GUI is not available, please either use the '-b' flag, or install the 'khiops' native package for your host operating system." + else + error "$KHIOPS_JAVA_ERROR" + fi + else + launch_khiops "$@" + fi + fi fi -exit $? \ No newline at end of file +unset _IS_CONDA +exit $? diff --git a/packaging/linux/common/khiops_env/README.md b/packaging/linux/common/khiops_env/README.md new file mode 100644 index 000000000..9c4c6fe9b --- /dev/null +++ b/packaging/linux/common/khiops_env/README.md @@ -0,0 +1,7 @@ +# khiops_env generation + +khiops_env is built by cmake at configuration time by replacing variables in the file khiops_env.in. These variables are surrounded by '@'. +Some variables contain bash lines so to improve readability, they are filled from the files: +- export_env_variables.sh +- java_settings.sh +- use_environment_module.sh.in diff --git a/packaging/linux/common/khiops_env/export_env_variables.sh b/packaging/linux/common/khiops_env/export_env_variables.sh new file mode 100644 index 000000000..f3dac616b --- /dev/null +++ b/packaging/linux/common/khiops_env/export_env_variables.sh @@ -0,0 +1,6 @@ + +# Export environment variables to MPI processes +for line in $(env | grep -E '^(KHIOPS|Khiops|AWS_|S3_|GOOGLE_)'); do + name=${line%%=*} + MPI_EXTRA_FLAGS="${MPI_EXTRA_FLAGS} -x ${name}" +done \ No newline at end of file diff --git a/packaging/linux/common/khiops_env/java_settings.sh b/packaging/linux/common/khiops_env/java_settings.sh new file mode 100644 index 000000000..413f063e7 --- /dev/null +++ b/packaging/linux/common/khiops_env/java_settings.sh @@ -0,0 +1,22 @@ + +# Set java environment if Khiops GUI is installed +unset KHIOPS_JAVA_ERROR +unset KHIOPS_JAVA_PATH +unset KHIOPS_CLASSPATH +if [ -f "/usr/share/khiops/khiops.jar" ]; then + if [[ -z $JAVA_HOME ]] && [ -f /usr/bin/java ]; then + JAVA_HOME=$(readlink -f /usr/bin/java | sed "s:bin/java::") + fi + if [[ ! -d $JAVA_HOME ]]; then + KHIOPS_JAVA_ERROR="The JAVA_HOME directory doesn't exist ($JAVA_HOME)." + unset JAVA_HOME + else + JVM_PATH=$(find -L "$JAVA_HOME" -name libjvm.so 2>/dev/null) + if [[ -z $JVM_PATH ]]; then + KHIOPS_JAVA_ERROR="libjvm.so not found in JAVA_HOME ($JAVA_HOME)" + else + KHIOPS_JAVA_PATH=$(dirname "$JVM_PATH") + KHIOPS_CLASSPATH=/usr/share/khiops/norm.jar:/usr/share/khiops/khiops.jar + fi + fi +fi diff --git a/packaging/linux/common/khiops_env/khiops_env.in b/packaging/linux/common/khiops_env/khiops_env.in new file mode 100644 index 000000000..cb5629c21 --- /dev/null +++ b/packaging/linux/common/khiops_env/khiops_env.in @@ -0,0 +1,113 @@ +#!/bin/bash +help() { + echo "Usage: khiops_env [-h, --help] [--env]" + echo "khiops_env is an internal script intended to be used by Khiops tool and Khiops wrappers only." + echo "It sets all the environment variables required by the Khiops to run correctly (Java, MPI, etc)." + echo "Options:" + echo " -h, --help show this help message and exit" + echo " -env show the environment list and exit" + echo + echo "The following variables are used to set the path and classpath for the prerequisite of Khiops." + echo + echo "KHIOPS_PATH: full path of Khiops executable" + echo "KHIOPS_COCLUSTERING_PATH: full path of Khiops coclustering executable" + echo "KHIOPS_MPI_COMMAND: MPI command to call the Khiops tool" + echo "KHIOPS_JAVA_PATH: path of Java tool, to add in path" + echo "KHIOPS_CLASSPATH: Khiops java libraries, to add in classpath" + echo "KHIOPS_DRIVERS_PATH: search path of the drivers (by default /usr/bin if not defined)" + echo + echo "If they are not already defined, the following variables used by Khiops are set:" + echo + echo "KHIOPS_LAST_RUN_DIR: directory where Khiops writes output command file and log" + echo " (when not defined with -e and -o)" + echo "KHIOPS_PROC_NUMBER: processes number launched by Khiops (it's default value corresponds to the" + echo " number of physical cores of the computer)" + echo + echo "The following variables are not defined by default and can be used to change some default" + echo " properties of Khiops:" + echo + echo "KHIOPS_TMP_DIR: Khiops temporary directory location (default: the system default)." + echo " This location can be modified from the tool as well." + echo "KHIOPS_MEMORY_LIMIT: Khiops memory limit in MB (default: the system memory limit)." + echo " The minimum value is 100 MB; this setting is ignored if it is above the system's memory limit." + echo " It can only be reduced from the tool." + echo "KHIOPS_RAW_GUI: graphical user interface for file name selection" + echo " . default behavior if not set: depending on the file drivers available for Khiops" + echo " . set to 'true' to allow file name selection with uri schemas" + echo " . set to 'false' to allow local file name selection only with a file selection dialog" + echo "KHIOPS_MPI_HOST_FILE: provide hostfile to mpi." + echo "KHIOPS_MPI_VERBOSE: true (default) or false, print messages from mpi (OpenMPI only)." + echo + echo "In case of configuration problems, the variables KHIOPS_JAVA_ERROR and KHIOPS_MPI_ERROR contain error messages." +} + +[[ $# == 1 && $1 == "-h" ]] && help && exit 0 +[[ $# == 1 && $1 == "--help" ]] && help && exit 0 + +if [[ -z $KHIOPS_LAST_RUN_DIR ]]; then + export KHIOPS_LAST_RUN_DIR=/tmp/khiops/$USER +fi +@USE_ENVIRONMENT_MODULE@ +@KHIOPS_JAVA_SETTINGS@ +get_script_dir() { + SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) + echo "$SCRIPT_DIR/" +} +@ADDITIONAL_ENV_VAR@ +@IS_CONDA_VAR@ +@SET_KHIOPS_DRIVERS_PATH@ + +if [ -z "$KHIOPS_MPI_VERBOSE" ]; then + KHIOPS_MPI_VERBOSE="true" +fi + +if ! [ "$KHIOPS_MPI_VERBOSE" == "true" ]; then + # Mute mpi by adding flags (OpenMPI only) + MPI_EXTRA_FLAGS="@KHIOPS_MPI_QUIET@" +fi + +# Setting up mpi for multiple machines +if [[ -n $KHIOPS_MPI_HOST_FILE ]]; then + MPI_EXTRA_FLAGS="$MPI_EXTRA_FLAGS @MPIEXEC_HOSTFILE_FLAG@ $KHIOPS_MPI_HOST_FILE" + @EXPORT_ENV_VARIABLES@ +fi + +KHIOPS_PATH=@KHIOPS_PATH@@MODL_NAME@ +KHIOPS_COCLUSTERING_PATH=@KHIOPS_COCLUSTERING_PATH@MODL_Coclustering + +# Number of processes in use (must be set according to the physical cores number) +if [[ -z $KHIOPS_PROC_NUMBER ]]; then + KHIOPS_PROC_NUMBER=$("@GET_PROC_NUMBER_PATH@"_khiopsgetprocnumber | head -n 1) +fi + +if command -v mpiexec &>/dev/null; then + KHIOPS_MPI_ERROR="" + KHIOPS_MPI_COMMAND="$(type -P mpiexec) $MPI_EXTRA_FLAGS @KHIOPS_MPI_EXTRA_FLAG@ @MPIEXEC_NUMPROC_FLAG@ $KHIOPS_PROC_NUMBER" +else + KHIOPS_MPI_ERROR="We didn't find mpiexec in the regular path. Parallel computation is unavailable: Khiops is launched in serial" + KHIOPS_MPI_COMMAND="" +fi + +# without more than 2 procs, we use the serial khiops +if [[ $KHIOPS_PROC_NUMBER -le 2 ]]; then + KHIOPS_MPI_COMMAND="" +fi + +if [ "$1" = "--env" ]; then + echo KHIOPS_PATH "$KHIOPS_PATH" + echo KHIOPS_COCLUSTERING_PATH "$KHIOPS_COCLUSTERING_PATH" + echo KHIOPS_MPI_COMMAND "$KHIOPS_MPI_COMMAND" + echo KHIOPS_JAVA_PATH "$KHIOPS_JAVA_PATH" + echo KHIOPS_CLASSPATH "$KHIOPS_CLASSPATH" + echo KHIOPS_LAST_RUN_DIR "$KHIOPS_LAST_RUN_DIR" + echo KHIOPS_PROC_NUMBER "$KHIOPS_PROC_NUMBER" + echo KHIOPS_TMP_DIR "$KHIOPS_TMP_DIR" + echo KHIOPS_MEMORY_LIMIT "$KHIOPS_MEMORY_LIMIT" + echo KHIOPS_API_MODE "$KHIOPS_API_MODE" + echo KHIOPS_RAW_GUI "$KHIOPS_RAW_GUI" + echo KHIOPS_DRIVERS_PATH "$KHIOPS_DRIVERS_PATH" + echo KHIOPS_JAVA_ERROR "$KHIOPS_JAVA_ERROR" + echo KHIOPS_MPI_ERROR "$KHIOPS_MPI_ERROR" + echo KHIOPS_MPI_VERBOSE "$KHIOPS_MPI_VERBOSE" + @ADDITIONAL_ENV_VAR_DISPLAY@ +fi diff --git a/packaging/linux/common/khiops_env/use_environment_module.sh.in b/packaging/linux/common/khiops_env/use_environment_module.sh.in new file mode 100644 index 000000000..40c9c2567 --- /dev/null +++ b/packaging/linux/common/khiops_env/use_environment_module.sh.in @@ -0,0 +1,7 @@ + +# Load @MPI_IMPL@ with environment modules +if source /etc/profile.d/modules.sh &> /dev/null +then + module unload mpi + module load mpi/@MPI_IMPL@-@CMAKE_SYSTEM_PROCESSOR@ +fi \ No newline at end of file diff --git a/packaging/packaging.cmake b/packaging/packaging.cmake index a7ebb6848..ea02515ee 100644 --- a/packaging/packaging.cmake +++ b/packaging/packaging.cmake @@ -125,7 +125,7 @@ elseif("${MPI_IMPL}" STREQUAL "intel") set(CPACK_DEBIAN_KHIOPS_CORE_PACKAGE_DEPENDS "intel-hpckit") endif() set(CPACK_DEBIAN_KHIOPS_PACKAGE_DEPENDS - "khiops-core (=${KHIOPS_VERSION}-${CPACK_DEBIAN_PACKAGE_RELEASE}), default-jre (>=1.8)") + "khiops-core (=${KHIOPS_VERSION}-${CPACK_DEBIAN_PACKAGE_RELEASE}), default-jre (>=1.8), libnotify-bin") set(CPACK_DEBIAN_KNI_TRANSFER_PACKAGE_DEPENDS "kni (=${KHIOPS_VERSION}-${CPACK_DEBIAN_PACKAGE_RELEASE})") # packages recommends @@ -175,7 +175,7 @@ set(CPACK_RPM_KHIOPS_CORE_PACKAGE_OBSOLETES "khiops-core <= 10.2.1-2") # packages requires set(CPACK_RPM_KHIOPS_PACKAGE_REQUIRES "khiops-core = ${KHIOPS_VERSION}-${CPACK_RPM_PACKAGE_RELEASE}") -set(CPACK_RPM_KHIOPS_PACKAGE_REQUIRES "java >= 1.8") +set(CPACK_RPM_KHIOPS_PACKAGE_REQUIRES "java >= 1.8, libnotify") set(CPACK_RPM_KHIOPS_CORE_PACKAGE_REQUIRES "util-linux") set(CPACK_RPM_KNI_TRANSFER_PACKAGE_REQUIRES "kni") diff --git a/packaging/windows/khiops.cmd b/packaging/windows/khiops.cmd new file mode 100644 index 000000000..40922d165 --- /dev/null +++ b/packaging/windows/khiops.cmd @@ -0,0 +1,93 @@ +@echo off +setlocal + +REM ======================================================== +REM See the khiops_env script for full documentation on the +REM environment variables used by Khiops +REM ======================================================== + +REM ======================================================== +REM Initialization of the installation directory of Khiops + +REM Test is khiops_env is present +if not exist "%~dp0khiops_env.cmd" goto ERR_PATH_1 + +REM Initialize Khiops env variables +call "%~dp0khiops_env" + +REM Test is Khiops environment already set up +if not exist "%KHIOPS_PATH%" goto ERR_PATH_2 + +REM display mpi configuration problems if any +if not "%KHIOPS_MPI_ERROR%". == "". echo %KHIOPS_MPI_ERROR% + +REM Test if batch mode from parameters +set KHIOPS_BATCH_MODE=false +for %%i in (%*) do ( + for %%f in ("-h" "-b" "-s" "-v") do if /I "%%~i"=="%%~f" ( + set KHIOPS_BATCH_MODE=true + goto BREAK_LOOP + ) +) +:BREAK_LOOP + +if "%KHIOPS_BATCH_MODE%" == "true" if not "%KHIOPS_JAVA_ERROR%". == "". goto ERR_JAVA +if "%_IS_CONDA%" == "true" if not "%KHIOPS_BATCH_MODE%" == "true" goto ERR_CONDA + +REM Set path +set path=%~dp0;%KHIOPS_JAVA_PATH%;%path% +set classpath=%KHIOPS_CLASSPATH%;%classpath% + +REM unset local variables +set "KHIOPS_BATCH_MODE=" +set "_IS_CONDA=" + +REM ======================================================== +REM Start Khiops (with or without parameteres) + +if %1.==. goto NOPARAMS +if not %1.==. goto PARAMS + +REM Start without parameters +:NOPARAMS +if not exist "%KHIOPS_LAST_RUN_DIR%" md "%KHIOPS_LAST_RUN_DIR%" +if not exist "%KHIOPS_LAST_RUN_DIR%" goto PARAMS + +%KHIOPS_MPI_COMMAND% "%KHIOPS_PATH%" -o "%KHIOPS_LAST_RUN_DIR%\scenario._kh" -e "%KHIOPS_LAST_RUN_DIR%\log.txt" +if %errorlevel% EQU 0 goto END +goto ERR_RETURN_CODE + +REM Start with parameters +:PARAMS +%KHIOPS_MPI_COMMAND% "%KHIOPS_PATH%" %* +if %errorlevel% EQU 0 goto END +goto ERR_RETURN_CODE + +REM ======================================================== +REM Error messages + +:ERR_PATH_1 +start "KHIOPS CONFIGURATION PROBLEM" echo ERROR "khiops_env.cmd is missing in directory %~dp0" +exit /b 1 + +:ERR_PATH_2 +start "KHIOPS CONFIGURATION PROBLEM" echo ERROR "Incorrect installation directory for Khiops (File %KHIOPS_PATH% not found)" +exit /b 1 + +:ERR_RETURN_CODE +start "KHIOPS EXECUTION PROBLEM" cmd /k "echo ERROR Khiops ended with return code %errorlevel% & echo Contents of the log file at %KHIOPS_LAST_RUN_DIR%\log.txt: & type %KHIOPS_LAST_RUN_DIR%\log.txt" +goto END + +:ERR_JAVA +start "KHIOPS CONFIGURATION PROBLEM" echo ERROR "%KHIOPS_JAVA_ERROR%" +exit /b 1 + +:ERR_CONDA +echo GUI is not available, please use the '-b' flag +exit /b 1 + +:END +endlocal + +REM Return 1 if fatal error, 0 otherwise +exit /b %errorlevel% \ No newline at end of file diff --git a/packaging/windows/khiops_coclustering.cmd b/packaging/windows/khiops_coclustering.cmd new file mode 100644 index 000000000..9e251d1d8 --- /dev/null +++ b/packaging/windows/khiops_coclustering.cmd @@ -0,0 +1,91 @@ +@echo off +setlocal + +REM ======================================================== +REM See the khiops_env script for full documentation on the +REM environment variables used by Khiops +REM ======================================================== + +REM ======================================================== +REM Initialization of the installation directory of Khiops + +REM Test is khiops_env is present +if not exist "%~dp0khiops_env.cmd" goto ERR_PATH_1 + +REM Initialize Khiops env variables +call "%~dp0khiops_env" + +REM Test is Khiops environment already set up +if not exist "%KHIOPS_COCLUSTERING_PATH%" goto ERR_PATH_2 + +REM Test if batch mode from parameters +set KHIOPS_BATCH_MODE=false +for %%i in (%*) do ( + for %%f in ("-h" "-b" "-s" "-v") do if /I "%%~i"=="%%~f" ( + set KHIOPS_BATCH_MODE=true + goto BREAK_LOOP + ) +) +:BREAK_LOOP + +if "%KHIOPS_BATCH_MODE%" == "true" if not "%KHIOPS_JAVA_ERROR%". == "". goto ERR_JAVA +if "%_IS_CONDA%" == "true" if not "%KHIOPS_BATCH_MODE%" == "true" goto ERR_CONDA + + +REM Set path +set path=%~dp0;%KHIOPS_JAVA_PATH%;%path% +set classpath=%KHIOPS_CLASSPATH%;%classpath% + +REM unset local variables +set "KHIOPS_BATCH_MODE=" +set "_IS_CONDA=" + +REM ======================================================== +REM Start Khiops (with or without parameteres) + +if %1.==. goto NOPARAMS +if not %1.==. goto PARAMS + +REM Start without parameters +:NOPARAMS +if not exist "%KHIOPS_LAST_RUN_DIR%" md "%KHIOPS_LAST_RUN_DIR%" +if not exist "%KHIOPS_LAST_RUN_DIR%" goto PARAMS + +"%KHIOPS_COCLUSTERING_PATH%" -o "%KHIOPS_LAST_RUN_DIR%\scenario._kh" -e "%KHIOPS_LAST_RUN_DIR%\log.txt" +if %errorlevel% EQU 0 goto END +goto ERR_RETURN_CODE + +REM Start with parameters +:PARAMS +"%KHIOPS_COCLUSTERING_PATH%" %* +if %errorlevel% EQU 0 goto END +goto ERR_RETURN_CODE + +REM ======================================================== +REM Error messages + +:ERR_PATH_1 +start "KHIOPS CONFIGURATION PROBLEM" echo ERROR "khiops_env.cmd is missing in directory %~dp0" +exit /b 1 + +:ERR_PATH_2 +start "KHIOPS CONFIGURATION PROBLEM" echo ERROR "Incorrect installation directory for Khiops (File %KHIOPS_COCLUSTERING_PATH% not found)" +exit /b 1 + +:ERR_RETURN_CODE +start "KHIOPS EXECUTION PROBLEM" cmd /k "echo ERROR Khiops ended with return code %errorlevel% & echo Contents of the log file at %KHIOPS_LAST_RUN_DIR%\log.txt: & type %KHIOPS_LAST_RUN_DIR%\log.txt" +goto END + +:ERR_JAVA +start "KHIOPS CONFIGURATION PROBLEM" echo ERROR "%KHIOPS_JAVA_ERROR%" +exit /b 1 + +:ERR_CONDA +echo GUI is not available, please use the '-b' flag +exit /b 1 + +:END +endlocal + +REM Return 1 if fatal error, 0 otherwise +exit /b %errorlevel% \ No newline at end of file diff --git a/packaging/windows/khiops_env.cmd.in b/packaging/windows/khiops_env.cmd.in new file mode 100644 index 000000000..010bbc598 --- /dev/null +++ b/packaging/windows/khiops_env.cmd.in @@ -0,0 +1,164 @@ +@echo off + +if %1.==--env. goto DISPLAY_ENV +if %*.==. goto SET_ENV + +:HELP +echo Usage: khiops_env [-h, --help] [--env] +echo khiops_env is an internal script intended to be used by Khiops tool and Khiops wrappers only. +echo It sets all the environment variables required by the Khiops to run correctly (Java, MPI, etc). +echo Options: +echo -h, --help show this help message and exit +echo -env show the environment list and exit +echo. +echo The following variables are used to set the path and classpath for the prerequisite of Khiops. +echo. +echo KHIOPS_PATH: full path of Khiops executable +echo KHIOPS_COCLUSTERING_PATH: full path of Khiops coclustering executable +echo KHIOPS_MPI_COMMAND: MPI command to call the Khiops tool +echo KHIOPS_JAVA_PATH: path of Java tool, to add in path +echo KHIOPS_CLASSPATH: Khiops java libraries, to add in classpath +echo KHIOPS_DRIVERS_PATH: search path of the drivers (by default Khiops bin directory if not defined) +echo. +echo If they are not already defined, the following variables used by Khiops are set: +echo. +echo KHIOPS_LAST_RUN_DIR: directory where Khiops writes output command file and log +echo (when not defined with -e and -o) +echo KHIOPS_PROC_NUMBER: processes number launched by Khiops (it's default value corresponds to the +echo number of physical cores of the computer) +echo. +echo The following variables are not defined by default and can be used to change some default +echo properties of Khiops: +echo. +echo KHIOPS_TMP_DIR: Khiops temporary directory location (default: the system default). +echo This location can be modified from the tool as well. +echo KHIOPS_MEMORY_LIMIT: Khiops memory limit in MB (default: the system memory limit). +echo The minimum value is 100 MB; this setting is ignored if it is above the system's memory limit. +echo It can only be reduced from the tool. +echo KHIOPS_RAW_GUI: graphical user interface for file name selection +echo . default behavior if not set: depending on the file drivers available for Khiops +echo . set to 'true' to allow file name selection with uri schemas +echo . set to 'false' to allow local file name selection only with a file selection dialog +echo. +echo In case of configuration problems, the variables KHIOPS_JAVA_ERROR and KHIOPS_MPI_ERROR contain error messages. + +if not %2.==. exit /b 1 +if %1.==-h. exit /b 0 +if %1.==--help. exit /b 0 +exit /b 1 + +REM Set Khiops environment variables +:DISPLAY_ENV +setlocal +set DISPLAY_ENV=true + +:SET_ENV +REM Initialize exported variables +set "KHIOPS_PATH=" +set "KHIOPS_COCLUSTERING_PATH=" +set "KHIOPS_MPI_COMMAND=" +set "KHIOPS_JAVA_PATH=" +set "KHIOPS_CLASSPATH=" +set "KHIOPS_JAVA_ERROR=" +set "KHIOPS_MPI_ERROR=" + +REM Set Khiops home to parent directory +for %%a in ("%~dp0..") do set "_KHIOPS_HOME=%%~fa" + +REM KHIOPS_PATH +set "KHIOPS_PATH=%_KHIOPS_HOME%\bin\MODL.exe" +set "KHIOPS_COCLUSTERING_PATH=%_KHIOPS_HOME%\bin\MODL_Coclustering.exe" + +REM KHIOPS_LAST_RUN_DIR +if "%KHIOPS_LAST_RUN_DIR%". == "". set "KHIOPS_LAST_RUN_DIR=%USERPROFILE%\khiops_data\lastrun" + +REM KHIOPS_PROC_NUMBER +if "%KHIOPS_PROC_NUMBER%". == "". for /f %%i in ('"%~dp0_khiopsgetprocnumber"') do set "KHIOPS_PROC_NUMBER=%%i" +if "%KHIOPS_PROC_NUMBER%". == "". set "KHIOPS_PROC_NUMBER=1" + +REM Set MPI binary (mpiexec) +if %KHIOPS_PROC_NUMBER% LEQ 2 goto MPI_DONE +goto @SET_MPI@ + +:MPI_PARAMS +REM Add the MPI parameters +if not "%KHIOPS_MPI_COMMAND%." == "." set "KHIOPS_MPI_COMMAND="%KHIOPS_MPI_COMMAND%" -n %KHIOPS_PROC_NUMBER%" +:MPI_DONE + +set _KHIOPS_GUI=@GUI_STATUS@ +if "%_KHIOPS_GUI%" == "false" GOTO SKIP_GUI + +REM Set Java environment +set _JAVA_ERROR=false +if not exist "%_KHIOPS_HOME%\jre\bin\server\" set _JAVA_ERROR=true +if not exist "%_KHIOPS_HOME%\jre\bin\" set _JAVA_ERROR=true + +if "%_JAVA_ERROR%" == "false" ( + set "KHIOPS_JAVA_PATH=%_KHIOPS_HOME%\jre\bin\server\;%_KHIOPS_HOME%\jre\bin\" +) else set "KHIOPS_JAVA_ERROR=The JRE is missing in Khiops home directory, please reinstall Khiops" + +REM KHIOPS_CLASSPATH +set "KHIOPS_CLASSPATH=%_KHIOPS_HOME%\bin\norm.jar" +set "KHIOPS_CLASSPATH=%_KHIOPS_HOME%\bin\khiops.jar;%KHIOPS_CLASSPATH%" + +:SKIP_GUI + +@IS_CONDA_VAR@ + +@SET_KHIOPS_DRIVERS_PATH@ + +REM unset local variables +set "_KHIOPS_GUI=" +set "_JAVA_ERROR=" +set "_KHIOPS_HOME=" + +if not "%DISPLAY_ENV%" == "true" exit /b 0 + +REM Print the environment list on the standard output +echo KHIOPS_PATH %KHIOPS_PATH% +echo KHIOPS_COCLUSTERING_PATH %KHIOPS_COCLUSTERING_PATH% +echo KHIOPS_MPI_COMMAND %KHIOPS_MPI_COMMAND% +echo KHIOPS_JAVA_PATH %KHIOPS_JAVA_PATH% +echo KHIOPS_CLASSPATH %KHIOPS_CLASSPATH% +echo KHIOPS_LAST_RUN_DIR %KHIOPS_LAST_RUN_DIR% +echo KHIOPS_PROC_NUMBER %KHIOPS_PROC_NUMBER% +echo KHIOPS_TMP_DIR %KHIOPS_TMP_DIR% +echo KHIOPS_MEMORY_LIMIT %KHIOPS_MEMORY_LIMIT% +echo KHIOPS_RAW_GUI %KHIOPS_RAW_GUI% +echo KHIOPS_DRIVERS_PATH %KHIOPS_DRIVERS_PATH% +echo KHIOPS_JAVA_ERROR %KHIOPS_JAVA_ERROR% +echo KHIOPS_MPI_ERROR %KHIOPS_MPI_ERROR% +endlocal +exit /b 0 + +REM Set mpiexec path for conda installation +:SET_MPI_CONDA +set "KHIOPS_MPI_COMMAND=%_KHIOPS_HOME%\Library\bin\mpiexec.exe" +if not exist "%KHIOPS_MPI_COMMAND%" goto ERR_MPI +goto MPI_PARAMS + +REM Set mpiexec path for system wide installation +:SET_MPI_SYSTEM_WIDE +REM ... with the standard variable MSMPI_BIN +set "KHIOPS_MPI_COMMAND=%MSMPI_BIN%mpiexec.exe" +if exist "%KHIOPS_MPI_COMMAND%" goto MPI_PARAMS +REM ... if MSMPI_BIN is not correctly defined +REM ... we try to call directly mpiexec (assuming its path is in the 'path' variable) +set "KHIOPS_MPI_COMMAND=mpiexec" +where /q "%KHIOPS_MPI_COMMAND%" +if ERRORLEVEL 1 goto ERR_MPI +REM ... finally we check if it is the good MPI implementation: "Microsoft MPI" +"%KHIOPS_MPI_COMMAND%" | findstr /c:"Microsoft MPI" > nul +if ERRORLEVEL 1 goto ERR_MPI_IMPL +goto MPI_PARAMS + + +:ERR_MPI +set "KHIOPS_MPI_ERROR=We didn't find mpiexec in the regular path. Parallel computation is unavailable: Khiops is launched in serial" +set "KHIOPS_MPI_COMMAND=" +goto MPI_DONE + +:ERR_MPI_IMPL +set "KHIOPS_MPI_ERROR=We can't find the right implementation of mpiexec, we expect to find Microsoft MPI. Parallel computation is unavailable: Khiops is launched in serial" +set "KHIOPS_MPI_COMMAND=" +goto MPI_DONE diff --git a/packaging/windows/nsis/CreateKhiopsCmdFileFunc.nsh b/packaging/windows/nsis/CreateKhiopsCmdFileFunc.nsh deleted file mode 100644 index df287a991..000000000 --- a/packaging/windows/nsis/CreateKhiopsCmdFileFunc.nsh +++ /dev/null @@ -1,122 +0,0 @@ -!include "FileFunc.nsh" -!include "x64.nsh" - -# Macro to create the khiops.cmd script -# Example: -# ${CreateKhiopsCmdFile} "$INSTDIR\khiops.cmd" "MODL" "" "$INSTDIR" "scenario._kh" "log.txt" "1" -# -!define CreateKhiopsCmdFile "!insertmacro CreateKhiopsCmdFile" -!macro CreateKhiopsCmdFile FileName ToolName BinSuffix KhiopsHome ScenarioFileName LogFileName ParallelMode - Push "${ParallelMode}" - Push "${LogFileName}" - Push "${ScenarioFileName}" - Push "${KhiopsHome}" - Push "${BinSuffix}" - Push "${ToolName}" - Push "${FileName}" - Call CreateKhiopsCmdFile -!macroend - - -Function CreateKhiopsCmdFile - # Function parameters - Var /GLOBAL _FileName - Var /GLOBAL _ToolName - Var /GLOBAL _BinSuffix - Var /GLOBAL _KhiopsHome - Var /GLOBAL _ScenarioFileName - Var /GLOBAL _LogFileName - Var /GLOBAL _ParallelMode - - # Retrieve parameters from stack - Pop $_FileName - Pop $_ToolName - Pop $_BinSuffix - Pop $_KhiopsHome - Pop $_ScenarioFileName - Pop $_LogFileName - Pop $_ParallelMode - - # Open file to create - FileOpen $0 "$_FileName" w - - # Write file - FileWrite $0 `@echo off$\r$\n` - FileWrite $0 `setlocal$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `REM ========================================================$\r$\n` - FileWrite $0 `REM See the khiops_env script for full documentation on the$\r$\n` - FileWrite $0 `REM environment variables used by Khiops$\r$\n` - FileWrite $0 `REM ========================================================$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `REM ========================================================$\r$\n` - FileWrite $0 `REM Initialization of the installation directory of Khiops$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `REM Test is Khiops environment already set up$\r$\n` - FileWrite $0 `if "%KHIOPS_HOME%".=="". set KHIOPS_HOME=$_KhiopsHome$\r$\n` - FileWrite $0 `if not exist "%KHIOPS_HOME%\bin$_BinSuffix\$_ToolName.exe" goto ERR_PATH$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `REM Test if batch mode from parameters$\r$\n` - FileWrite $0 `set _KHIOPS_BATCH_MODE=$\r$\n` - FileWrite $0 `for %%i in (%*) do if %%i.==-b. set _KHIOPS_BATCH_MODE=true$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `REM Initialize Khiops env variables$\r$\n` - FileWrite $0 `call "%KHIOPS_HOME%\bin\khiops_env" --env > NUL$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `REM Set path$\r$\n` - FileWrite $0 `set path=%KHIOPS_PATH%;%KHIOPS_JAVA_PATH%;%path%$\r$\n` - FileWrite $0 `set classpath=%KHIOPS_CLASSPATH%;%classpath%$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `REM ========================================================$\r$\n` - FileWrite $0 `REM Start Khiops (with or without parameteres)$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `if %1.==. goto NOPARAMS$\r$\n` - FileWrite $0 `if not %1.==. goto PARAMS$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `REM Start without parameters$\r$\n` - FileWrite $0 `:NOPARAMS$\r$\n` - FileWrite $0 `if not exist "%KHIOPS_LAST_RUN_DIR%" md "%KHIOPS_LAST_RUN_DIR%"$\r$\n` - FileWrite $0 `if not exist "%KHIOPS_LAST_RUN_DIR%" goto PARAMS$\r$\n` - ${If} $_ParallelMode == "0" - FileWrite $0 `"%KHIOPS_PATH%$_BinSuffix\$_ToolName" -o "%KHIOPS_LAST_RUN_DIR%\$_ScenarioFileName" -e "%KHIOPS_LAST_RUN_DIR%\$_LogFileName"$\r$\n` - ${Else} - FileWrite $0 `%KHIOPS_MPI_COMMAND% "%KHIOPS_PATH%$_BinSuffix\$_ToolName" -o "%KHIOPS_LAST_RUN_DIR%\$_ScenarioFileName" -e "%KHIOPS_LAST_RUN_DIR%\$_LogFileName"$\r$\n` - ${EndIf} - FileWrite $0 `if %errorlevel% EQU 0 goto END$\r$\n` - FileWrite $0 `if %errorlevel% EQU 2 goto END$\r$\n` - FileWrite $0 `goto ERR_RETURN_CODE$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `REM Start with parameters$\r$\n` - FileWrite $0 `:PARAMS$\r$\n` - ${If} $_ParallelMode == "0" - FileWrite $0 `"%KHIOPS_PATH%$_BinSuffix\$_ToolName" %*$\r$\n` - ${Else} - FileWrite $0 `%KHIOPS_MPI_COMMAND% "%KHIOPS_PATH%$_BinSuffix\$_ToolName" %*$\r$\n` - ${EndIf} - FileWrite $0 `if %errorlevel% EQU 0 goto END$\r$\n` - FileWrite $0 `if %errorlevel% EQU 2 goto END$\r$\n` - FileWrite $0 `goto ERR_RETURN_CODE$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `REM ========================================================$\r$\n` - FileWrite $0 `REM Error messages$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `:ERR_PATH$\r$\n` - FileWrite $0 `start "KHIOPS CONFIG PROBLEM" echo ERROR Incorrect installation directory for Khiops (File $_ToolName.exe not found in directory %KHIOPS_PATH%$_BinSuffix)$\r$\n` - FileWrite $0 `exit /b 1$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `:ERR_RETURN_CODE$\r$\n` - FileWrite $0 `start "KHIOPS EXECUTION PROBLEM" cmd /k "echo ERROR Khiops ended with return code %errorlevel% & echo Contents of the log file at %KHIOPS_LAST_RUN_DIR%\$_LogFileName: & type %KHIOPS_LAST_RUN_DIR%\$_LogFileName"$\r$\n` - FileWrite $0 `goto END$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `:END$\r$\n` - FileWrite $0 `endlocal$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `REM Return 1 if fatal error, 2 if error(s), 0 otherwise$\r$\n` - FileWrite $0 `exit /b %errorlevel%$\r$\n` - - # Close file - FileClose $0 -FunctionEnd diff --git a/packaging/windows/nsis/CreateKhiopsEnvCmdFileFunc.nsh b/packaging/windows/nsis/CreateKhiopsEnvCmdFileFunc.nsh deleted file mode 100644 index 8cb2d43ee..000000000 --- a/packaging/windows/nsis/CreateKhiopsEnvCmdFileFunc.nsh +++ /dev/null @@ -1,140 +0,0 @@ -!include "FileFunc.nsh" -!include "x64.nsh" - -# Macro to create the khiops_env.cmd script -# Example: -# ${CreateKhiopsEnvCmdFile} "$INSTDIR\khiops_env.cmd" "$INSTDIR" "4" -# -!define CreateKhiopsEnvCmdFile "!insertmacro CreateKhiopsEnvCmdFile" -!macro CreateKhiopsEnvCmdFile FileName KhiopsHome PhysicalCoresNumber - Push "${PhysicalCoresNumber}" - Push "${KhiopsHome}" - Push "${FileName}" - Call CreateKhiopsEnvCmdFile -!macroend - -# Function to be used with the macro defined above -Function CreateKhiopsEnvCmdFile - # Define function parameters - Var /GLOBAL _EnvFileName - Var /GLOBAL _EnvKhiopsHome - Var /GLOBAL _EnvProcessNumber - - # Retrieve parameters from stack - Pop $_EnvFileName - Pop $_EnvKhiopsHome - Pop $_EnvProcessNumber - - # Open the file to create - FileOpen $0 "$_EnvFileName" w - - # Write file contents - FileWrite $0 `@echo off$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `if %1.==--env. goto SET_ENV$\r$\n` - FileWrite $0 `goto HELP$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `:HELP$\r$\n` - FileWrite $0 `echo Usage: khiops-env [-h] [--env]$\r$\n` - FileWrite $0 `echo khiops-env is an internal script intended to be used by Khiops tool and Khiops'$\r$\n` - FileWrite $0 `echo wrappers only.$\r$\n` - FileWrite $0 `echo If the --env flag is used, the environment list is printed on the standard output$\r$\n` - FileWrite $0 `echo.$\r$\n` - FileWrite $0 `echo The following variables are used to set the path and classpath$\r$\n` - FileWrite $0 `echo for the prerequisite of Khiops.$\r$\n` - FileWrite $0 `echo.$\r$\n` - FileWrite $0 `echo KHIOPS_HOME: home directory of Khiops, on Windows only$\r$\n` - FileWrite $0 `echo.$\r$\n` - FileWrite $0 `echo KHIOPS_PATH: path of Khiops' executable, to add in path$\r$\n` - FileWrite $0 `echo KHIOPS_MPI_COMMAND: MPI command to call the Khiops tool$\r$\n` - FileWrite $0 `echo KHIOPS_JAVA_PATH: directory of the jvm.dll, to add in path$\r$\n` - FileWrite $0 `echo KHIOPS_CLASSPATH: Khiops java libraries, to add in classpath$\r$\n` - FileWrite $0 `echo.$\r$\n` - FileWrite $0 `echo If they are not already defined, the following variables used by$\r$\n` - FileWrite $0 `echo Khiops are set :$\r$\n` - FileWrite $0 `echo.$\r$\n` - FileWrite $0 `echo KHIOPS_LAST_RUN_DIR: directory where Khiops writes output command$\r$\n` - FileWrite $0 `echo file and log (when not defined with -e and -o)$\r$\n` - FileWrite $0 `echo KHIOPS_PROC_NUMBER: processes number launched by Khiops (it's$\r$\n` - FileWrite $0 `echo default value corresponds to the number of physical cores of$\r$\n` - FileWrite $0 `echo the computer plus one)$\r$\n` - FileWrite $0 `echo.$\r$\n` - FileWrite $0 `echo The following variables are not defined by default and can be used to$\r$\n` - FileWrite $0 `echo change some default properties of Khiops:$\r$\n` - FileWrite $0 `echo.$\r$\n` - FileWrite $0 `echo KHIOPS_TMP_DIR: Khiops' temporary directory location (default : the$\r$\n` - FileWrite $0 `echo system's default) This location can be modified from the tool as well$\r$\n` - FileWrite $0 `echo KHIOPS_MEMORY_LIMIT: Khiops' memory limit in MB (default : the system's memory limit).$\r$\n` - FileWrite $0 `echo The minimum value is 100 MB; this setting is ignored if it is above the system's memory limit.$\r$\n` - FileWrite $0 `echo It can only be reduced from the tool.$\r$\n` - FileWrite $0 `echo KHIOPS_API_MODE: standard or api mode for the management of output result files created by Khiops$\r$\n` - FileWrite $0 `echo In standard mode, the result files are stored in the train database directory,$\r$\n` - FileWrite $0 `echo unless an absolute path is specified, and the file extension is forced if necessary.$\r$\n` - FileWrite $0 `echo In api mode, the result files are stored in the current working directory, using the specified results as is.$\r$\n` - FileWrite $0 `echo . default behavior if not set: standard mode$\r$\n` - FileWrite $0 `echo . set to 'true' to force standard mode$\r$\n` - FileWrite $0 `echo . set to 'false' to force api mode$\r$\n` - FileWrite $0 `echo KHIOPS_RAW_GUI: graphical user interface for file name selection$\r$\n` - FileWrite $0 `echo . default behavior if not set: depending on the file drivers available for Khiops$\r$\n` - FileWrite $0 `echo . set to 'true' to allow file name selection with uri schemas$\r$\n` - FileWrite $0 `echo . set to 'false' to allow local file name selection only with a file selection dialog$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `if not %2.==. exit /b 1$\r$\n` - FileWrite $0 `if %1.==-h. exit /b 0$\r$\n` - FileWrite $0 `exit /b 1$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `REM Set Khiops env variables$\r$\n` - FileWrite $0 `:SET_ENV$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `REM Test if Khiops environment already set up$\r$\n` - FileWrite $0 `if "%KHIOPS_HOME%".=="". set KHIOPS_HOME=$_EnvKhiopsHome$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `REM KHIOPS_PATH$\r$\n` - FileWrite $0 `set KHIOPS_PATH=%KHIOPS_HOME%\bin$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `REM KHIOPS_CLASSPATH$\r$\n` - FileWrite $0 `set KHIOPS_CLASSPATH=%KHIOPS_HOME%\bin\norm.jar$\r$\n` - FileWrite $0 `set KHIOPS_CLASSPATH=%KHIOPS_HOME%\bin\khiops.jar;%KHIOPS_CLASSPATH%$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `REM KHIOPS_LAST_RUN_DIR$\r$\n` - FileWrite $0 `if "%KHIOPS_LAST_RUN_DIR%".=="". set KHIOPS_LAST_RUN_DIR=%USERPROFILE%\khiops_data\lastrun$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `REM KHIOPS_PROC_NUMBER$\r$\n` - FileWrite $0 `if "%KHIOPS_PROC_NUMBER%".=="". set KHIOPS_PROC_NUMBER=$_EnvProcessNumber$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `REM KHIOPS_MPI_COMMAND$\r$\n` - FileWrite $0 `REM Priority$\r$\n` - FileWrite $0 `REM 0: Idle$\r$\n` - FileWrite $0 `REM 1: Below normal$\r$\n` - FileWrite $0 `REM 2: Normal$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `set KHIOPS_MPI_COMMAND="%MSMPI_BIN%mpiexec" -al spr:P -n %KHIOPS_PROC_NUMBER% /priority 1$\r$\n` - FileWrite $0 `if %KHIOPS_PROC_NUMBER%==1 set KHIOPS_MPI_COMMAND=$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `$\r$\n` - FileWrite $0 `REM Set Java environment$\r$\n` - FileWrite $0 `set KHIOPS_JAVA_PATH=%KHIOPS_HOME%\jre\bin\server\;%KHIOPS_HOME%\jre\bin\$\r$\n` - FileWrite $0 `REM Print the environment list on the standard output$\r$\n` - FileWrite $0 `echo KHIOPS_HOME %KHIOPS_HOME%$\r$\n` - FileWrite $0 `echo KHIOPS_PATH %KHIOPS_PATH%$\r$\n` - FileWrite $0 `echo KHIOPS_MPI_COMMAND %KHIOPS_MPI_COMMAND%$\r$\n` - FileWrite $0 `echo KHIOPS_JAVA_PATH %KHIOPS_JAVA_PATH%$\r$\n` - FileWrite $0 `echo KHIOPS_CLASSPATH %KHIOPS_CLASSPATH%$\r$\n` - FileWrite $0 `echo KHIOPS_LAST_RUN_DIR %KHIOPS_LAST_RUN_DIR%$\r$\n` - FileWrite $0 `echo KHIOPS_PROC_NUMBER %KHIOPS_PROC_NUMBER%$\r$\n` - FileWrite $0 `echo KHIOPS_TMP_DIR %KHIOPS_TMP_DIR%$\r$\n` - FileWrite $0 `echo KHIOPS_MEMORY_LIMIT %KHIOPS_MEMORY_LIMIT%$\r$\n` - FileWrite $0 `echo KHIOPS_API_MODE %KHIOPS_API_MODE%$\r$\n` - FileWrite $0 `echo KHIOPS_RAW_GUI %KHIOPS_RAW_GUI%$\r$\n` - FileWrite $0 `exit /b 0$\r$\n` - - # Close file - FileClose $0 -FunctionEnd diff --git a/packaging/windows/nsis/GetCoresCount.nsh b/packaging/windows/nsis/GetCoresCount.nsh deleted file mode 100644 index 74822c04e..000000000 --- a/packaging/windows/nsis/GetCoresCount.nsh +++ /dev/null @@ -1,54 +0,0 @@ -# Copied entirely from -# http://stackoverflow.com/questions/29911549/cpu-features-getcount-return-incorrect-number-for-cpu-cores -!include LogicLib.nsh -!ifndef ERROR_INSUFFICIENT_BUFFER -!define ERROR_INSUFFICIENT_BUFFER 122 -!endif -!define RelationProcessorCore 0 - -!if "${NSIS_PTR_SIZE}" <= 4 -Function GetProcessorPhysCoreCount -System::Store S -StrCpy $9 0 ; 0 if we fail -System::Call 'kernel32::GetLogicalProcessorInformationEx(i${RelationProcessorCore},i,*i0r2)i.r0?e' -Pop $3 -${If} $3 = ${ERROR_INSUFFICIENT_BUFFER} -${AndIf} $2 <> 0 - System::Alloc $2 - System::Call 'kernel32::GetLogicalProcessorInformationEx(i${RelationProcessorCore},isr1,*ir2r2)i.r0' - Push $1 - ${If} $0 <> 0 - loop_7: - IntOp $9 $9 + 1 - System::Call *$1(i,i.r3) - IntOp $1 $1 + $3 - IntOp $2 $2 - $3 - IntCmp $2 0 "" loop_7 loop_7 - ${EndIf} - Pop $1 - System::Free $1 -${Else} - System::Call 'kernel32::GetLogicalProcessorInformation(i,*i0r2)i.r0?e' - Pop $3 - ${If} $3 = ${ERROR_INSUFFICIENT_BUFFER} - System::Alloc $2 - System::Call 'kernel32::GetLogicalProcessorInformation(isr1,*ir2r2)i.r0' - Push $1 - ${If} $0 <> 0 - loop_v: - System::Call *$1(i,i.r3) - ${If} $3 == ${RelationProcessorCore} - IntOp $9 $9 + 1 - ${EndIf} - IntOp $1 $1 + 24 - IntOp $2 $2 - 24 - IntCmp $2 0 "" loop_v loop_v - ${EndIf} - Pop $1 - System::Free $1 - ${EndIf} -${EndIf} -Push $9 -System::Store L -FunctionEnd -!endif diff --git a/packaging/windows/nsis/KhiopsGlobals.nsh b/packaging/windows/nsis/KhiopsGlobals.nsh index 72f5cabbd..c3548bc3a 100644 --- a/packaging/windows/nsis/KhiopsGlobals.nsh +++ b/packaging/windows/nsis/KhiopsGlobals.nsh @@ -1,14 +1,5 @@ # Global Definitions # ------------------ -# Minimal required Java version -!define JavaRequiredVersion "1.8" - -# Minimal required Java update version of the package installer -!define JavaRequiredVersionUpdate "181" - -# Minimal required full Java version (stored in the registry) -!define JavaRequiredFullVersion "1.8.0_181" - # Minimal required MPI version !define MPIRequiredVersion "10.0" diff --git a/packaging/windows/nsis/ReplaceInFile.nsh b/packaging/windows/nsis/ReplaceInFile.nsh new file mode 100644 index 000000000..5e35edc99 --- /dev/null +++ b/packaging/windows/nsis/ReplaceInFile.nsh @@ -0,0 +1,164 @@ +/* +Description: "This function allows you to replace pieces of text in a file. +You have the option of replacing text from the xth times of its occurring, and y times +replacement of the text from that xth occurrence onwards. Just copy the full code underneath in a +text file with notepad, rename it: ReplaceInFile3.nsh and save it in the Include folder of your +NSIS installation folder. You should run your installer.exe file in administrator mode which +grants you authorization for modifying files in directories (Otherwise the code will be overlooked with no warning). + +Copied as it is from from https://nsis.sourceforge.io/More_advanced_replace_text_in_file +" + +/* +First occurrence to be replaced: FST_OCC. + FST_OCC = all, renders the same as FST_OCC = 1. + if FST_OCC greater than the number of occurences in the file: no alteration of the file, + FST_OCC negative or 0 will leave the file content unchanged no matter the NR_OCC value. +Nr max of occurrences replaced onwards: NR_OCC, + if NR_OCC = all --> replacement as long as a string to be replaced is found, + if NR_OCC = stritly positive integer, replaces up to NR_OCC occurrences provided they exist, + NR_OCC negative or 0 yields the same as all. + +Order to run down and search the file: from left to right and top down. +REPLACEMENT_STR, OLD_STR, read line should be less than 1024 characters long. +For NSIS Unicode, FILE_TO_MODIFIED must be utf-8 encoded. +*/ + +; Var /Global OLD_STR +; Var /Global FST_OCC +; Var /Global NR_OCC +; Var /Global REPLACEMENT_STR +; Var /Global FILE_TO_MODIFIED + +!macro ReplaceInFile OLD_STR FST_OCC NR_OCC REPLACEMENT_STR FILE_TO_MODIFIED + Push "${OLD_STR}" ;text to be replaced + Push "${REPLACEMENT_STR}" ;replace with + Push "${FST_OCC}" ; starts replacing onwards FST_OCC occurrences + Push "${NR_OCC}" ; replaces NR_OCC occurrences in all + Push "${FILE_TO_MODIFIED}" ; file to replace in + Call ReplaceInFile + +!macroend + + +Function ReplaceInFile +Exch $0 ;FILE_TO_MODIFIED file to replace in +Exch +Exch $1 ;the NR_OCC of OLD_STR occurrences to be replaced. +Exch +Exch 2 +Exch $2 ;FST_OCC: the first occurrence to be replaced and onwards +Exch 2 +Exch 3 +Exch $3 ;REPLACEMENT_STR string to replace with +Exch 3 +Exch 4 +Exch $4 ;OLD_STR to be replaced +Exch 4 +Push $5 ;incrementing counter +Push $6 ;a chunk of read line +Push $7 ;the read line altered or not +Push $8 ;left string +Push $9 ;right string or forster read line +Push $R0 ;temp file handle +Push $R1 ;FILE_TO_MODIFIED file handle +Push $R2 ;a line read +Push $R3 ;the length of OLD_STR +Push $R4 ;counts reaching of FST_OCC +Push $R5 ;counts reaching of NR_OCC +Push $R6 ;temp file name + + + GetTempFileName $R6 + + FileOpen $R1 $0 r ;FILE_TO_MODIFIED file to search in + FileOpen $R0 $R6 w ;temp file + StrLen $R3 $4 ;the length of OLD_STR + StrCpy $R4 0 ;counter initialization + StrCpy $R5 -1 ;counter initialization + +loop_read: + ClearErrors + FileRead $R1 $R2 ;reading line + IfErrors exit ;when end of file has been reached + + StrCpy $5 -1 ;cursor, start of read line chunk + StrLen $7 $R2 ;read line length + IntOp $5 $5 - $7 ;cursor initialization + StrCpy $7 $R2 ;$7 contains read line + +loop_filter: + IntOp $5 $5 + 1 ;cursor shifting + StrCmp $5 0 file_write ;end of line has been reached + StrCpy $6 $7 $R3 $5 ;a chunk of read line of length OLD_STR + StrCmp $6 $4 0 loop_filter ;continues to search OLD_STR if no match + +StrCpy $8 $7 $5 ;left part +IntOp $6 $5 + $R3 +IntCmp $6 0 yes no ;left part + OLD_STR == full line read ? +yes: +StrCpy $9 "" +Goto done +no: +StrCpy $9 $7 "" $6 ;right part +done: +StrCpy $9 $8$3$9 ;replacing OLD_STR by REPLACEMENT_STR in forster read line + +IntOp $R4 $R4 + 1 ;counter incrementation +;MessageBox MB_OK|MB_ICONINFORMATION \ +;"count R4 = $R4, fst_occ = $2" +StrCmp $2 all follow_up ;exchange ok, then goes to search the next OLD_STR +IntCmp $R4 $2 follow_up ;no exchange until FST_OCC has been reached, +Goto loop_filter ;and then searching for the next OLD_STR + +follow_up: +IntOp $R4 $R4 - 1 ;now counter is to be stuck to FST_OCC + +IntOp $R5 $R5 + 1 ;counter incrementation +;MessageBox MB_OK|MB_ICONINFORMATION \ +;"count R5 = $R5, nbr_occ = $1" +StrCmp $1 all exchange_ok ;goes to exchange OLD_STR with REPLACEMENT_STR +IntCmp $R5 $1 finalize ;proceeding exchange until NR_OCC has been reached + +exchange_ok: +IntOp $5 $5 + $R3 ;updating cursor +StrCpy $7 $9 ;updating read line with forster read line +Goto loop_filter ;goes searching the same read line + +finalize: +IntOp $R5 $R5 - 1 ;now counter is to be stuck to NR_OCC + +file_write: + FileWrite $R0 $7 ;writes altered or unaltered line +Goto loop_read ;reads the next line + +exit: + FileClose $R0 + FileClose $R1 + + ;SetDetailsPrint none + Delete $0 + Rename $R6 $0 ;superseding FILE_TO_MODIFIED file with + ;temp file built with REPLACEMENT_STR + ;Delete $R6 + ;SetDetailsPrint lastused + +Pop $R6 +Pop $R5 +Pop $R4 +Pop $R3 +Pop $R2 +Pop $R1 +Pop $R0 +Pop $9 +Pop $8 +Pop $7 +Pop $6 +Pop $5 +;These values are stored in the stack in the reverse order they were pushed +Pop $0 +Pop $1 +Pop $2 +Pop $3 +Pop $4 +FunctionEnd \ No newline at end of file diff --git a/packaging/windows/nsis/khiops.nsi b/packaging/windows/nsis/khiops.nsi index 661fbf188..28c579d62 100644 --- a/packaging/windows/nsis/khiops.nsi +++ b/packaging/windows/nsis/khiops.nsi @@ -1,787 +1,783 @@ -# Khiops installer builder NSIS script - -# Set Unicode to avoid warning 7998: "ANSI targets are deprecated" -Unicode True - -# Set compresion to LZMA (faster) -SetCompressor /SOLID lzma - -# Include NSIS librairies -!include "LogicLib.nsh" -!include "MUI2.nsh" -!include "FileFunc.nsh" -!include "x64.nsh" -!include "winmessages.nsh" - -# Include Custom libraries -!include "KhiopsGlobals.nsh" -!include "GetCoresCount.nsh" -!include "CreateKhiopsEnvCmdFileFunc.nsh" -!include "CreateKhiopsCmdFileFunc.nsh" -!include "KhiopsPrerequisiteFunc.nsh" - -# Definitions for registry change notification -!define SHCNE_ASSOCCHANGED 0x8000000 -!define SHCNF_IDLIST 0 - -# Get installation folder from registry if available -InstallDirRegKey HKLM Software\khiops "" - -# Request admin privileges -RequestExecutionLevel admin - -# Make it aware of HiDPI screens -ManifestDPIAware true - -# Macro to check input parameter definitions -!macro CheckInputParameter ParameterName - !ifndef ${ParameterName} - !error "${ParameterName} is not defined. Use the flag '-D${ParameterName}=...' to define it." - !endif -!macroend - -# Check the mandatory input definitions -!insertmacro CheckInputParameter KHIOPS_VERSION -!insertmacro CheckInputParameter KHIOPS_REDUCED_VERSION -!insertmacro CheckInputParameter KHIOPS_WINDOWS_BUILD_DIR -!insertmacro CheckInputParameter KHIOPS_VIZ_INSTALLER_PATH -!insertmacro CheckInputParameter KHIOPS_COVIZ_INSTALLER_PATH -!insertmacro CheckInputParameter JRE_PATH -!insertmacro CheckInputParameter MSMPI_INSTALLER_PATH -!insertmacro CheckInputParameter MSMPI_VERSION -!insertmacro CheckInputParameter KHIOPS_SAMPLES_DIR -!insertmacro CheckInputParameter KHIOPS_DOC_DIR - -# Application name and installer file name -Name "Khiops ${KHIOPS_VERSION}" -OutFile "khiops-${KHIOPS_VERSION}-setup.exe" - -######################## -# Variable definitions # -######################## - -# MPI installation flag -Var /GLOBAL IsMPIRequired - -# Requirements installation flags -Var /GLOBAL MPIInstallationNeeded - -# Requirements installation messages -Var /GLOBAL MPIInstallationMessage - -# Number of physical cores -Var /GLOBAL PhysicalCoresNumber - -# Number of processes to use -Var /GLOBAL ProcessNumber - -# Previous Uninstaller data -Var /GLOBAL PreviousUninstaller -Var /GLOBAL PreviousVersion - -# %Public%, %AllUsersProfile% (%ProgramData%) and samples directory -Var /GLOBAL WinPublicDir -Var /GLOBAL AllUsersProfileDir -Var /GLOBAL GlobalKhiopsDataDir -Var /GLOBAL SamplesInstallDir - -# Root key for the uninstaller in the windows registry -!define UninstallerKey "SOFTWARE\Microsoft\Windows\CurrentVersion\Uninstall" - -##################################### -# Modern UI Interface Configuration # -##################################### - -# General configuration -!define MUI_HEADERIMAGE -!define MUI_HEADERIMAGE_BITMAP ".\images\headerimage.bmp" -!define MUI_HEADERIMAGE_LEFT -!define MUI_WELCOMEFINISHPAGE_BITMAP ".\images\welcomefinish.bmp" -!define MUI_ABORTWARNING -!define MUI_ICON ".\images\installer.ico" -!define MUI_UNICON "${NSISDIR}\Contrib\Graphics\Icons\win-uninstall.ico" -BrandingText "Orange" - -# Welcome page -!define MUI_WELCOMEPAGE_TITLE "Welcome to the Khiops ${KHIOPS_VERSION} Setup Wizard" -!define MUI_WELCOMEPAGE_TEXT \ - "Khiops is a data mining tool includes data preparation and scoring, visualization, coclustering and covisualization.$\r$\n$\r$\n$\r$\n$\r$\n$(MUI_${MUI_PAGE_UNINSTALLER_PREFIX}TEXT_WELCOME_INFO_TEXT)" -!insertmacro MUI_PAGE_WELCOME - -# Licence page -!insertmacro MUI_PAGE_LICENSE "..\..\..\LICENSE" - -# Custom page for requirements software -Page custom RequirementsPageShow RequirementsPageLeave - -# Install directory choice page -!insertmacro MUI_PAGE_DIRECTORY - -# Install files choice page -!insertmacro MUI_PAGE_INSTFILES - -# Final page -!define MUI_FINISHPAGE_RUN -!define MUI_FINISHPAGE_RUN_TEXT "Create desktop shortcut" -!define MUI_FINISHPAGE_RUN_FUNCTION "CreateDesktopShortcuts" -!define MUI_FINISHPAGE_TEXT "$\r$\n$\r$\nThank you for installing Khiops." -!define MUI_FINISHPAGE_LINK "khiops.org" -!define MUI_FINISHPAGE_LINK_LOCATION "https://khiops.org" -!insertmacro MUI_PAGE_FINISH - -# Uninstaller pages -!insertmacro MUI_UNPAGE_CONFIRM -!insertmacro MUI_UNPAGE_INSTFILES - -# Language (must be defined after uninstaller) -!insertmacro MUI_LANGUAGE "English" - -####################### -# Version Information # -####################### - -VIProductVersion "${KHIOPS_REDUCED_VERSION}.0" -VIAddVersionKey /LANG=${LANG_ENGLISH} "ProductName" "Khiops" -VIAddVersionKey /LANG=${LANG_ENGLISH} "CompanyName" "Orange" -VIAddVersionKey /LANG=${LANG_ENGLISH} "LegalCopyright" "Copyright (c) 2024 Orange" -VIAddVersionKey /LANG=${LANG_ENGLISH} "FileDescription" "Khiops Installer" -VIAddVersionKey /LANG=${LANG_ENGLISH} "FileVersion" "${KHIOPS_VERSION}" - -###################### -# Installer Sections # -###################### - -Section "Install" SecInstall - # In order to have shortcuts and documents for all users - SetShellVarContext all - - # Detect Java - Call RequirementsDetection - - - # MPI installation is always required, because Khiops is linked with MPI DLL - ${If} $MPIInstallationNeeded == "1" - Call InstallMPI - ${EndIf} - - # Activate file overwrite - SetOverwrite on - - # Install executables and java libraries - SetOutPath "$INSTDIR\bin" - File "${KHIOPS_WINDOWS_BUILD_DIR}\bin\MODL.exe" - File "${KHIOPS_WINDOWS_BUILD_DIR}\bin\MODL_Coclustering.exe" - File "${KHIOPS_WINDOWS_BUILD_DIR}\jars\norm.jar" - File "${KHIOPS_WINDOWS_BUILD_DIR}\jars\khiops.jar" - - # Install Docs - SetOutPath "$INSTDIR" - File "/oname=LICENSE.txt" "..\..\..\LICENSE" - File "..\..\common\khiops\README.txt" - File "..\..\common\khiops\WHATSNEW.txt" - SetOutPath "$INSTDIR\doc" - File /nonfatal /a /r "${KHIOPS_DOC_DIR}\" - - # Install icons - SetOutPath "$INSTDIR\bin\icons" - File ".\images\installer.ico" - File "..\..\common\images\khiops.ico" - File "..\..\common\images\khiops_coclustering.ico" - - # Set the samples directory to be located either within %PUBLIC% or %ALLUSERSPROFILE% as fallback - ReadEnvStr $WinPublicDir PUBLIC - ReadEnvStr $AllUsersProfileDir ALLUSERSPROFILE - ${If} $WinPublicDir != "" - StrCpy $GlobalKhiopsDataDir "$WinPublicDir\khiops_data" - ${ElseIf} $AllUsersProfileDir != "" - StrCpy $GlobalKhiopsDataDir "$AllUsersProfileDir\khiops_data" - ${Else} - StrCpy $GlobalKhiopsDataDir "" - ${EndIf} - - # Debug message - !ifdef DEBUG - ${If} $GlobalKhiopsDataDir == "" - Messagebox MB_OK "Could find PUBLIC nor ALLUSERSPROFILE directories. Samples not installed." - ${Else} - Messagebox MB_OK "Samples will be installed at $GlobalKhiopsDataDir\samples." - ${EndIf} - !endif - - # Install samples only if the directory is defined - ${If} $GlobalKhiopsDataDir != "" - StrCpy $SamplesInstallDir "$GlobalKhiopsDataDir\samples" - SetOutPath "$SamplesInstallDir" - File "/oname=README.txt" "${KHIOPS_SAMPLES_DIR}\README.md" - SetOutPath "$SamplesInstallDir\Adult" - File "${KHIOPS_SAMPLES_DIR}\Adult\Adult.kdic" - File "${KHIOPS_SAMPLES_DIR}\Adult\Adult.txt" - SetOutPath "$SamplesInstallDir\Iris" - File "${KHIOPS_SAMPLES_DIR}\Iris\Iris.kdic" - File "${KHIOPS_SAMPLES_DIR}\Iris\Iris.txt" - SetOutPath "$SamplesInstallDir\Mushroom" - File "${KHIOPS_SAMPLES_DIR}\Mushroom\Mushroom.kdic" - File "${KHIOPS_SAMPLES_DIR}\Mushroom\Mushroom.txt" - SetOutPath "$SamplesInstallDir\Letter" - File "${KHIOPS_SAMPLES_DIR}\Letter\Letter.kdic" - File "${KHIOPS_SAMPLES_DIR}\Letter\Letter.txt" - SetOutPath "$SamplesInstallDir\SpliceJunction" - File "${KHIOPS_SAMPLES_DIR}\SpliceJunction\SpliceJunction.kdic" - File "${KHIOPS_SAMPLES_DIR}\SpliceJunction\SpliceJunction.txt" - File "${KHIOPS_SAMPLES_DIR}\SpliceJunction\SpliceJunctionDNA.txt" - SetOutPath "$SamplesInstallDir\Accidents" - File "${KHIOPS_SAMPLES_DIR}\Accidents\Accidents.kdic" - File "${KHIOPS_SAMPLES_DIR}\Accidents\Accidents.txt" - File "${KHIOPS_SAMPLES_DIR}\Accidents\Places.txt" - File "${KHIOPS_SAMPLES_DIR}\Accidents\Users.txt" - File "${KHIOPS_SAMPLES_DIR}\Accidents\Vehicles.txt" - File "${KHIOPS_SAMPLES_DIR}\Accidents\train.py" - File "/oname=README.txt" "${KHIOPS_SAMPLES_DIR}\Accidents\README.md" - SetOutPath "$SamplesInstallDir\Accidents\raw" - File "${KHIOPS_SAMPLES_DIR}\Accidents\raw\AccidentsPreprocess.kdic" - File "${KHIOPS_SAMPLES_DIR}\Accidents\raw\Description_BD_ONISR.pdf" - File "${KHIOPS_SAMPLES_DIR}\Accidents\raw\Licence_Ouverte.pdf" - File "${KHIOPS_SAMPLES_DIR}\Accidents\raw\caracteristiques-2018.csv" - File "${KHIOPS_SAMPLES_DIR}\Accidents\raw\lieux-2018.csv" - File "${KHIOPS_SAMPLES_DIR}\Accidents\raw\usagers-2018.csv" - File "${KHIOPS_SAMPLES_DIR}\Accidents\raw\vehicules-2018.csv" - File "${KHIOPS_SAMPLES_DIR}\Accidents\raw\preprocess.py" - File "/oname=README.txt" "${KHIOPS_SAMPLES_DIR}\Accidents\raw\README.md" - SetOutPath "$SamplesInstallDir\AccidentsSummary" - File "${KHIOPS_SAMPLES_DIR}\AccidentsSummary\Accidents.kdic" - File "${KHIOPS_SAMPLES_DIR}\AccidentsSummary\Accidents.txt" - File "${KHIOPS_SAMPLES_DIR}\AccidentsSummary\Vehicles.txt" - File "/oname=README.txt" "${KHIOPS_SAMPLES_DIR}\AccidentsSummary\README.md" - SetOutPath "$SamplesInstallDir\Customer" - File "${KHIOPS_SAMPLES_DIR}\Customer\Customer.kdic" - File "${KHIOPS_SAMPLES_DIR}\Customer\CustomerRecoded.kdic" - File "${KHIOPS_SAMPLES_DIR}\Customer\Customer.txt" - File "${KHIOPS_SAMPLES_DIR}\Customer\Address.txt" - File "${KHIOPS_SAMPLES_DIR}\Customer\Service.txt" - File "${KHIOPS_SAMPLES_DIR}\Customer\Usage.txt" - File "${KHIOPS_SAMPLES_DIR}\Customer\sort_and_recode_customer.py" - File "/oname=README.txt" "${KHIOPS_SAMPLES_DIR}\Customer\README.md" - SetOutPath "$SamplesInstallDir\Customer\unsorted" - File "${KHIOPS_SAMPLES_DIR}\Customer\unsorted\Customer-unsorted.txt" - File "${KHIOPS_SAMPLES_DIR}\Customer\unsorted\Address-unsorted.txt" - File "${KHIOPS_SAMPLES_DIR}\Customer\unsorted\Service-unsorted.txt" - File "${KHIOPS_SAMPLES_DIR}\Customer\unsorted\Usage-unsorted.txt" - SetOutPath "$SamplesInstallDir\CustomerExtended" - File "${KHIOPS_SAMPLES_DIR}\CustomerExtended\Customer.kdic" - File "${KHIOPS_SAMPLES_DIR}\CustomerExtended\CustomerRecoded.kdic" - File "${KHIOPS_SAMPLES_DIR}\CustomerExtended\Customer.txt" - File "${KHIOPS_SAMPLES_DIR}\CustomerExtended\Address.txt" - File "${KHIOPS_SAMPLES_DIR}\CustomerExtended\Service.txt" - File "${KHIOPS_SAMPLES_DIR}\CustomerExtended\Usage.txt" - File "${KHIOPS_SAMPLES_DIR}\CustomerExtended\City.txt" - File "${KHIOPS_SAMPLES_DIR}\CustomerExtended\Country.txt" - File "${KHIOPS_SAMPLES_DIR}\CustomerExtended\Product.txt" - File "${KHIOPS_SAMPLES_DIR}\CustomerExtended\recode_customer.py" - File "/oname=README.txt" "${KHIOPS_SAMPLES_DIR}\CustomerExtended\README.md" - ${EndIf} - - # Install JRE - SetOutPath $INSTDIR\jre - File /nonfatal /a /r "${JRE_PATH}\" - - # Install Khiops Visualization App - - # Add the installer file - SetOutPath $TEMP - File ${KHIOPS_VIZ_INSTALLER_PATH} - - # Execute Khiops visualization installer: - # - It is not executed with silent mode so the user can customize the install - # - It is executed with "cmd /C" so it opens the installer options window - Var /Global KHIOPS_VIZ_INSTALLER_FILENAME - ${GetFileName} ${KHIOPS_VIZ_INSTALLER_PATH} $KHIOPS_VIZ_INSTALLER_FILENAME - ${If} ${Silent} - nsexec::Exec 'cmd /C "$KHIOPS_VIZ_INSTALLER_FILENAME /S"' - ${Else} - nsexec::Exec 'cmd /C "$KHIOPS_VIZ_INSTALLER_FILENAME"' - ${EndIf} - Pop $0 - DetailPrint "Installation of Khiops visualization: $0" - - # Delete the installer - Delete "$TEMP\KHIOPS_VIZ_INSTALLER_FILENAME" - - - # Execute Khiops covisualization installer: - # Same rules as above with the visualization - - # Files to install in installer directory - File ${KHIOPS_COVIZ_INSTALLER_PATH} - - Var /Global KHIOPS_COVIZ_INSTALLER_FILENAME - ${GetFileName} ${KHIOPS_COVIZ_INSTALLER_PATH} $KHIOPS_COVIZ_INSTALLER_FILENAME - ${If} ${Silent} - nsexec::Exec 'cmd /C "$TEMP\$KHIOPS_COVIZ_INSTALLER_FILENAME /S"' - ${Else} - nsexec::Exec 'cmd /C "$TEMP\$KHIOPS_COVIZ_INSTALLER_FILENAME"' - ${EndIf} - Pop $0 - DetailPrint "Installation of Khiops covisualization: $0" - - # Delete the installer - Delete "$TEMP\$KHIOPS_COVIZ_INSTALLER_FILENAME" - - - ############################# - # Finalize the installation # - ############################# - - - # Creation of Khiops cmd files for Khiops et Khiops Coclustering - StrCpy $ProcessNumber $PhysicalCoresNumber - ${If} $PhysicalCoresNumber >= 2 - IntOp $ProcessNumber $PhysicalCoresNumber + 1 - ${EndIf} - ${CreateKhiopsEnvCmdFile} "$INSTDIR\bin\khiops_env.cmd" "$INSTDIR" $ProcessNumber - ${CreateKhiopsCmdFile} "$INSTDIR\bin\khiops.cmd" "MODL" "" "$INSTDIR" "scenario._kh" "log.txt" $IsMPIRequired - ${CreateKhiopsCmdFile} "$INSTDIR\bin\khiops_coclustering.cmd" "MODL_Coclustering" "" "$INSTDIR" "scenario._khc" "logc.txt" "0" - - # Create the Khiops shell - FileOpen $0 "$INSTDIR\bin\shell_khiops.cmd" w - FileWrite $0 '@echo off$\r$\n' - FileWrite $0 'REM Open a shell session with access to Khiops$\r$\n' - FileWrite $0 `if "%KHIOPS_HOME%".=="". set KHIOPS_HOME=$INSTDIR$\r$\n` - FileWrite $0 'set path=%KHIOPS_HOME%\bin;%path%$\r$\n' - FileWrite $0 'title Shell Khiops$\r$\n' - FileWrite $0 '%comspec% /K "echo Welcome to Khiops scripting mode & echo Type khiops -h or khiops_coclustering -h to get help' - FileClose $0 - - # Create the uninstaller - WriteUninstaller "$INSTDIR\uninstall-khiops.exe" - - - ##################################### - # Windows environment customization # - # ################################### - - - # Write registry keys to add Khiops in the Add/Remove Programs pane - WriteRegStr HKLM "Software\Khiops" "" $INSTDIR - WriteRegStr HKLM "${UninstallerKey}\Khiops" "UninstallString" '"$INSTDIR\uninstall-khiops.exe"' - WriteRegStr HKLM "${UninstallerKey}\Khiops" "InstallLocation" "$INSTDIR" - WriteRegStr HKLM "${UninstallerKey}\Khiops" "DisplayName" "Khiops" - WriteRegStr HKLM "${UninstallerKey}\Khiops" "Publisher" "Orange" - WriteRegStr HKLM "${UninstallerKey}\Khiops" "DisplayIcon" "$INSTDIR\bin\icons\installer.ico" - WriteRegStr HKLM "${UninstallerKey}\Khiops" "DisplayVersion" "${KHIOPS_VERSION}" - WriteRegStr HKLM "${UninstallerKey}\Khiops" "URLInfoAbout" "http://khiops.org" - WriteRegDWORD HKLM "${UninstallerKey}\Khiops" "NoModify" "1" - WriteRegDWORD HKLM "${UninstallerKey}\Khiops" "NoRepair" "1" - - # Set as the startup dir for all executable shortcuts (yes it is done with SetOutPath!) - ${If} $GlobalKhiopsDataDir != "" - SetOutPath $GlobalKhiopsDataDir - ${Else} - SetOutPath $INSTDIR - ${EndIf} - - # Create application shortcuts in the installation directory - DetailPrint "Installing Start menu Shortcut..." - CreateShortCut "$INSTDIR\Khiops.lnk" "$INSTDIR\bin\khiops.cmd" "" "$INSTDIR\bin\icons\khiops.ico" 0 SW_SHOWMINIMIZED - CreateShortCut "$INSTDIR\Khiops Coclustering.lnk" "$INSTDIR\bin\khiops_coclustering.cmd" "" "$INSTDIR\bin\icons\khiops_coclustering.ico" 0 SW_SHOWMINIMIZED - ExpandEnvStrings $R0 "%COMSPEC%" - CreateShortCut "$INSTDIR\Shell Khiops.lnk" "$INSTDIR\bin\shell_khiops.cmd" "" "$R0" - - # Create start menu shortcuts for the executables and documentation - DetailPrint "Installing Start menu Shortcut..." - CreateDirectory "$SMPROGRAMS\Khiops" - CreateShortCut "$SMPROGRAMS\Khiops\Khiops.lnk" "$INSTDIR\bin\khiops.cmd" "" "$INSTDIR\bin\icons\khiops.ico" 0 SW_SHOWMINIMIZED - CreateShortCut "$SMPROGRAMS\Khiops\Khiops Coclustering.lnk" "$INSTDIR\bin\khiops_coclustering.cmd" "" "$INSTDIR\bin\icons\khiops_coclustering.ico" 0 SW_SHOWMINIMIZED - ExpandEnvStrings $R0 "%COMSPEC%" - CreateShortCut "$SMPROGRAMS\Khiops\Shell Khiops.lnk" "$INSTDIR\bin\shell_khiops.cmd" "" "$R0" - CreateShortCut "$SMPROGRAMS\Khiops\Uninstall.lnk" "$INSTDIR\uninstall-khiops.exe" - CreateDirectory "$SMPROGRAMS\Khiops\doc" - CreateShortCut "$SMPROGRAMS\Khiops\doc\Tutorial.lnk" "$INSTDIR\doc\KhiopsTutorial.pdf" - CreateShortCut "$SMPROGRAMS\Khiops\doc\Khiops.lnk" "$INSTDIR\doc\KhiopsGuide.pdf" - CreateShortCut "$SMPROGRAMS\Khiops\doc\Khiops Coclustering.lnk" "$INSTDIR\doc\KhiopsCoclusteringGuide.pdf" - CreateShortCut "$SMPROGRAMS\Khiops\doc\Khiops Visualization.lnk" "$INSTDIR\doc\KhiopsVisualizationGuide.pdf" - CreateShortCut "$SMPROGRAMS\Khiops\doc\Khiops Covisualization.lnk" "$INSTDIR\doc\KhiopsCovisualizationGuide.pdf" - SetOutPath "$INSTDIR" - - # Define aliases for the following registry keys (also used in the uninstaller section) - # - HKLM (all users) - # - HKCU (current user) - !define env_hklm 'HKLM "SYSTEM\CurrentControlSet\Control\Session Manager\Environment"' - !define env_hkcu 'HKCU "Environment"' - - # Set KHIOPS_HOME for the local machine and current user - WriteRegExpandStr ${env_hklm} "KHIOPS_HOME" "$INSTDIR" - WriteRegExpandStr ${env_hkcu} "KHIOPS_HOME" "$INSTDIR" - - # Make sure windows knows about the change - SendMessage ${HWND_BROADCAST} ${WM_WININICHANGE} 0 "STR:Environment" /TIMEOUT=5000 - - # Register file association for Khiops visualisation tools # - # inspired from examples\makensis.nsi - - # Khiops dictionary file extension - ReadRegStr $R0 HKCR ".kdic" "" - ${if} $R0 == "Khiops.Dictionary.File" - DeleteRegKey HKCR "Khiops.Dictionary.File" - ${EndIf} - WriteRegStr HKCR ".kdic" "" "Khiops.Dictionary.File" - WriteRegStr HKCR "Khiops.Dictionary.File" "" "Khiops Dictionary File" - ReadRegStr $R0 HKCR "Khiops.Dictionary.File\shell\open\command" "" - ${If} $R0 == "" - WriteRegStr HKCR "Khiops.Dictionary.File\shell" "" "open" - WriteRegStr HKCR "Khiops.Dictionary.File\shell\open\command" "" 'notepad.exe "%1"' - ${EndIf} - - # Khiops scenario file - ReadRegStr $R0 HKCR "._kh" "" - ${if} $R0 == "Khiops.File" - DeleteRegKey HKCR "Khiops.File" - ${EndIf} - WriteRegStr HKCR "._kh" "" "Khiops.File" - WriteRegStr HKCR "Khiops.File" "" "Khiops File" - WriteRegStr HKCR "Khiops.File\DefaultIcon" "" "$INSTDIR\bin\icons\khiops.ico" - ReadRegStr $R0 HKCR "Khiops.File\shell\open\command" "" - ${If} $R0 == "" - WriteRegStr HKCR "Khiops.File\shell" "" "open" - WriteRegStr HKCR "Khiops.File\shell\open\command" "" 'notepad.exe "%1"' - ${EndIf} - WriteRegStr HKCR "Khiops.File\shell\compile" "" "Execute Khiops Script" - WriteRegStr HKCR "Khiops.File\shell\compile\command" "" '"$INSTDIR\bin\khiops.cmd" -i "%1"' - - # Khiops coclustering scenario file - ReadRegStr $R0 HKCR "._khc" "" - ${if} $R0 == "Khiops.Coclustering.File" - DeleteRegKey HKCR "Khiops.Coclustering.File" - ${EndIf} - WriteRegStr HKCR "._khc" "" "Khiops.Coclustering.File" - WriteRegStr HKCR "Khiops.Coclustering.File" "" "Khiops Coclustering File" - WriteRegStr HKCR "Khiops.Coclustering.File\DefaultIcon" "" "$INSTDIR\bin\icons\khiops_coclustering.ico" - ReadRegStr $R0 HKCR "Khiops.Coclustering.File\shell\open\command" "" - ${If} $R0 == "" - WriteRegStr HKCR "Khiops.Coclustering.File\shell" "" "open" - WriteRegStr HKCR "Khiops.Coclustering.File\shell\open\command" "" 'notepad.exe "%1"' - ${EndIf} - WriteRegStr HKCR "Khiops.Coclustering.File\shell\compile" "" "Execute Khiops Coclustering Script" - WriteRegStr HKCR "Khiops.Coclustering.File\shell\compile\command" "" '"$INSTDIR\bin\khiops_coclustering.cmd" -i "%1"' - - # Notify the file extension changes - System::Call 'Shell32::SHChangeNotify(i ${SHCNE_ASSOCCHANGED}, i ${SHCNF_IDLIST}, i 0, i 0)' - - # Debug message - !ifdef DEBUG - Messagebox MB_OK "Installation finished!" - !endif - -SectionEnd - - -############### -# Uninstaller # -############### - -Section "Uninstall" - # In order to have shortcuts and documents for all users - SetShellVarContext all - - # Restore Registry # - # Unregister file associations - DetailPrint "Uninstalling Khiops Shell Extensions..." - - # Unregister Khiops dictionary file extension - ${If} $R0 == "Khiops.Dictionary.File" - DeleteRegKey HKCR ".kdic" - ${EndIf} - DeleteRegKey HKCR "Khiops.Dictionary.File" - - # Unregister Khiops file extension - ${If} $R0 == "Khiops.File" - DeleteRegKey HKCR "._kh" - ${EndIf} - DeleteRegKey HKCR "Khiops.File" - - # Unregister Khiops coclustering file extension - ${If} $R0 == "Khiops.Coclustering.File" - DeleteRegKey HKCR "._khc" - ${EndIf} - DeleteRegKey HKCR "Khiops.Coclustering.File" - - # Notify file extension changes - System::Call 'Shell32::SHChangeNotify(i ${SHCNE_ASSOCCHANGED}, i ${SHCNF_IDLIST}, i 0, i 0)' - - # Delete installation folder key - DeleteRegKey HKLM "${UninstallerKey}\Khiops" - DeleteRegKey HKLM "Software\Khiops" - - # Delete environement variable KHIOPS_HOME - DeleteRegValue ${env_hklm} "KHIOPS_HOME" - DeleteRegValue ${env_hkcu} "KHIOPS_HOME" - - # Delete deprecated environment variable KhiopsHome - DeleteRegValue ${env_hklm} "KhiopsHome" - DeleteRegValue ${env_hkcu} "KhiopsHome" - - # Make sure windows knows about the changes in the environment - SendMessage ${HWND_BROADCAST} ${WM_WININICHANGE} 0 "STR:Environment" /TIMEOUT=5000 - - # Delete files # - # Note: Some directories are removed only if they are completely empty (no "/r" RMDir flag) - DetailPrint "Deleting Files ..." - - # Delete docs - Delete "$INSTDIR\LICENSE.txt" - Delete "$INSTDIR\README.txt" - Delete "$INSTDIR\WHATSNEW.txt" - RMDir /r "$INSTDIR\doc" - - # Delete jre - RMDir /r "$INSTDIR\jre" - - # Delete icons - RMDir /r "$INSTDIR\bin\icons" - - # Delete executables and scripts - Delete "$INSTDIR\bin\khiops_env.cmd" - Delete "$INSTDIR\bin\khiops.cmd" - Delete "$INSTDIR\bin\khiops_coclustering.cmd" - Delete "$INSTDIR\bin\MODL.exe" - Delete "$INSTDIR\bin\MODL_Coclustering.exe" - Delete "$INSTDIR\bin\norm.jar" - Delete "$INSTDIR\bin\khiops.jar" - Delete "$INSTDIR\bin\shell_khiops.cmd" - RMDir "$INSTDIR\bin" - - # Delete shortcuts from install dir - Delete "$INSTDIR\Khiops.lnk" - Delete "$INSTDIR\Khiops Coclustering.lnk" - Delete "$INSTDIR\Shell Khiops.lnk" - - # Delete the installer - Delete "$INSTDIR\uninstall-khiops.exe" - - # Remove install directory - RMDir "$INSTDIR" - - # Delete desktop shortcuts - Delete "$DESKTOP\Khiops.lnk" - Delete "$DESKTOP\Khiops Coclustering.lnk" - Delete "$DESKTOP\Shell Khiops.lnk" - - # Delete Start Menu Shortcuts - RMDir /r "$SMPROGRAMS\Khiops" - - # Set the samples directory to be located either within %PUBLIC% or %ALLUSERSPROFILE% as fallback - ReadEnvStr $WinPublicDir PUBLIC - ReadEnvStr $AllUsersProfileDir ALLUSERSPROFILE - ${If} $WinPublicDir != "" - StrCpy $GlobalKhiopsDataDir "$WinPublicDir\khiops_data" - ${ElseIf} $AllUsersProfileDir != "" - StrCpy $GlobalKhiopsDataDir "$AllUsersProfileDir\khiops_data" - ${Else} - StrCpy $GlobalKhiopsDataDir "" - ${EndIf} - - # Delete sample datasets - # We do not remove the whole directory to save the users results from Khiops' analyses - ${If} $GlobalKhiopsDataDir != "" - StrCpy $SamplesInstallDir "$GlobalKhiopsDataDir\samples" - Delete "$SamplesInstallDir\AccidentsSummary\Accidents.kdic" - Delete "$SamplesInstallDir\AccidentsSummary\Accidents.txt" - Delete "$SamplesInstallDir\AccidentsSummary\README.txt" - Delete "$SamplesInstallDir\AccidentsSummary\Vehicles.txt" - Delete "$SamplesInstallDir\Accidents\Accidents.kdic" - Delete "$SamplesInstallDir\Accidents\Accidents.txt" - Delete "$SamplesInstallDir\Accidents\Places.txt" - Delete "$SamplesInstallDir\Accidents\README.txt" - Delete "$SamplesInstallDir\Accidents\Users.txt" - Delete "$SamplesInstallDir\Accidents\Vehicles.txt" - Delete "$SamplesInstallDir\Accidents\raw\AccidentsPreprocess.kdic" - Delete "$SamplesInstallDir\Accidents\raw\Description_BD_ONISR.pdf" - Delete "$SamplesInstallDir\Accidents\raw\Licence_Ouverte.pdf" - Delete "$SamplesInstallDir\Accidents\raw\README.txt" - Delete "$SamplesInstallDir\Accidents\raw\caracteristiques-2018.csv" - Delete "$SamplesInstallDir\Accidents\raw\lieux-2018.csv" - Delete "$SamplesInstallDir\Accidents\raw\preprocess.py" - Delete "$SamplesInstallDir\Accidents\raw\usagers-2018.csv" - Delete "$SamplesInstallDir\Accidents\raw\vehicules-2018.csv" - Delete "$SamplesInstallDir\Accidents\train.py" - Delete "$SamplesInstallDir\Adult\Adult.kdic" - Delete "$SamplesInstallDir\Adult\Adult.txt" - Delete "$SamplesInstallDir\CustomerExtended\Address.txt" - Delete "$SamplesInstallDir\CustomerExtended\City.txt" - Delete "$SamplesInstallDir\CustomerExtended\Country.txt" - Delete "$SamplesInstallDir\CustomerExtended\Customer.kdic" - Delete "$SamplesInstallDir\CustomerExtended\Customer.txt" - Delete "$SamplesInstallDir\CustomerExtended\CustomerRecoded.kdic" - Delete "$SamplesInstallDir\CustomerExtended\Product.txt" - Delete "$SamplesInstallDir\CustomerExtended\README.txt" - Delete "$SamplesInstallDir\CustomerExtended\Service.txt" - Delete "$SamplesInstallDir\CustomerExtended\Usage.txt" - Delete "$SamplesInstallDir\CustomerExtended\recode_customer.py" - Delete "$SamplesInstallDir\Customer\Address.txt" - Delete "$SamplesInstallDir\Customer\Customer.kdic" - Delete "$SamplesInstallDir\Customer\Customer.txt" - Delete "$SamplesInstallDir\Customer\CustomerRecoded.kdic" - Delete "$SamplesInstallDir\Customer\README.txt" - Delete "$SamplesInstallDir\Customer\Service.txt" - Delete "$SamplesInstallDir\Customer\Usage.txt" - Delete "$SamplesInstallDir\Customer\sort_and_recode_customer.py" - Delete "$SamplesInstallDir\Customer\unsorted\Address-unsorted.txt" - Delete "$SamplesInstallDir\Customer\unsorted\Customer-unsorted.txt" - Delete "$SamplesInstallDir\Customer\unsorted\Service-unsorted.txt" - Delete "$SamplesInstallDir\Customer\unsorted\Usage-unsorted.txt" - Delete "$SamplesInstallDir\Iris\Iris.kdic" - Delete "$SamplesInstallDir\Iris\Iris.txt" - Delete "$SamplesInstallDir\Letter\Letter.kdic" - Delete "$SamplesInstallDir\Letter\Letter.txt" - Delete "$SamplesInstallDir\Mushroom\Mushroom.kdic" - Delete "$SamplesInstallDir\Mushroom\Mushroom.txt" - Delete "$SamplesInstallDir\README.txt" - Delete "$SamplesInstallDir\SpliceJunction\SpliceJunction.kdic" - Delete "$SamplesInstallDir\SpliceJunction\SpliceJunction.txt" - Delete "$SamplesInstallDir\SpliceJunction\SpliceJunctionDNA.txt" - RMDir "$SamplesInstallDir\AccidentsSummary\" - RMDir "$SamplesInstallDir\Accidents\raw\" - RMDir "$SamplesInstallDir\Accidents\" - RMDir "$SamplesInstallDir\Adult\" - RMDir "$SamplesInstallDir\CustomerExtended\" - RMDir "$SamplesInstallDir\Customer\unsorted\" - RMDir "$SamplesInstallDir\Customer\" - RMDir "$SamplesInstallDir\Iris\" - RMDir "$SamplesInstallDir\Letter\" - RMDir "$SamplesInstallDir\Mushroom\" - RMDir "$SamplesInstallDir\SpliceJunction\" - RMDir "$SamplesInstallDir" - ${EndIf} -SectionEnd - - -####################### -# Installer Functions # -####################### - -Function "CreateDesktopShortcuts" - # Set as the startup dir for all executable shortcuts (yes it is done with SetOutPath!) - ${If} $GlobalKhiopsDataDir != "" - SetOutPath $GlobalKhiopsDataDir - ${Else} - SetOutPath $INSTDIR - ${EndIf} - - # Create the shortcuts - DetailPrint "Installing Desktop Shortcut..." - CreateShortCut "$DESKTOP\Khiops.lnk" "$INSTDIR\bin\khiops.cmd" "" "$INSTDIR\bin\icons\khiops.ico" 0 SW_SHOWMINIMIZED - CreateShortCut "$DESKTOP\Khiops Coclustering.lnk" "$INSTDIR\bin\khiops_coclustering.cmd" "" "$INSTDIR\bin\icons\khiops_coclustering.ico" 0 SW_SHOWMINIMIZED -FunctionEnd - -# Predefined initialization install function -Function .onInit - - # Read location of the uninstaller - ReadRegStr $PreviousUninstaller HKLM "${UninstallerKey}\Khiops" "UninstallString" - ReadRegStr $PreviousVersion HKLM "${UninstallerKey}\Khiops" "DisplayVersion" - - # Ask the user to proceed if there was already a previous Khiops version installed - # In silent mode: remove previous version - ${If} $PreviousUninstaller != "" - MessageBox MB_OKCANCEL|MB_ICONEXCLAMATION \ - "Khiops $PreviousVersion is already installed. $\n$\nClick OK to remove the \ - previous version $\n$\nor Cancel to cancel this upgrade." \ - /SD IDOK IDOK uninst - Abort - - # Run the uninstaller - uninst: - ClearErrors - ExecWait '$PreviousUninstaller /S _?=$INSTDIR' - - # Run again the uninstaller to delete the uninstaller itself and the root dir (without waiting) - # Must not be used in silent mode (may delete files from silent following installation) - ${IfNot} ${Silent} - ExecWait '$PreviousUninstaller /S' - ${EndIf} - ${EndIf} - - # Choice of default installation directory, for windows 32 or 64 - ${If} $INSTDIR == "" - ${If} ${RunningX64} - StrCpy $INSTDIR "$PROGRAMFILES64\khiops" - # No 32-bit install - ${EndIf} - ${EndIf} -FunctionEnd - - -# Function to show the page for requirements -Function RequirementsPageShow - # Detect requirements - Call RequirementsDetection - - # Creation of page, with title and subtitle - nsDialogs::Create 1018 - !insertmacro MUI_HEADER_TEXT "Check software requirements" "Check Microsoft MPI" - - # Message to show for the Microsoft MPI installation - ${NSD_CreateLabel} 0 20u 100% 10u $MPIInstallationMessage - - # Show page - nsDialogs::Show -FunctionEnd - - -# Requirements detection -# - Detects if the system architecture is 64-bit -# - Detects whether Java JRE and MPI are installed and their versions -Function RequirementsDetection - # Abort installation if the machine does not have 64-bit architecture - ${IfNot} ${RunningX64} - Messagebox MB_OK "Khiops works only on Windows 64 bits: installation will be terminated." /SD IDOK - Quit - ${EndIf} - - # Decide if MPI is required by detecting the number of cores - StrCpy $PhysicalCoresNumber "0" - Call GetProcessorPhysCoreCount - Pop $0 - StrCpy $PhysicalCoresNumber $0 - ${If} $PhysicalCoresNumber > 1 - StrCpy $IsMPIRequired "1" - ${Else} - StrCpy $IsMPIRequired "0" - ${EndIf} - ${If} $IsMPIRequired == "1" - # Note: This call defines MPIInstalledVersion - Call DetectAndLoadMPIEnvironment - ${EndIf} - - # Try to install MPI if it is required - StrCpy $MPIInstallationNeeded "0" - StrCpy $MPIInstallationMessage "" - ${If} $IsMPIRequired == "1" - # If it is not installed install it - ${If} $MPIInstalledVersion == "0" - StrCpy $MPIInstallationMessage "Microsoft MPI version ${MSMPI_VERSION} will be installed" - StrCpy $MPIInstallationNeeded "1" - # Otherwise install only if the required version is newer than the installed one - ${Else} - ${VersionCompare} "${MPIRequiredVersion}" "$MPIInstalledVersion" $0 - ${If} $0 == 1 - StrCpy $MPIInstallationMessage "Microsoft MPI will be upgraded to version ${MSMPI_VERSION}" - StrCpy $MPIInstallationNeeded "1" - ${Else} - StrCpy $MPIInstallationMessage "Microsoft MPI version already installed" - ${EndIf} - ${EndIf} - # Otherwise just inform that MPI is not required - ${Else} - StrCpy $MPIInstallationMessage "Microsoft MPI installation not required" - ${EndIf} - - # Show debug information - !ifdef DEBUG - Messagebox MB_OK "MS-MPI: needed=$MPIInstallationNeeded required=${MPIRequiredVersion} installed=$MPIInstalledVersion" - !endif - -FunctionEnd - -# No leave page for required software -Function RequirementsPageLeave -FunctionEnd +# Khiops installer builder NSIS script + +# Set Unicode to avoid warning 7998: "ANSI targets are deprecated" +Unicode True + +# Set compresion to LZMA (faster) +SetCompressor /SOLID lzma +#SetCompress off + +# Include NSIS librairies +!include "LogicLib.nsh" +!include "MUI2.nsh" +!include "FileFunc.nsh" +!include "x64.nsh" +!include "winmessages.nsh" + +# Include Custom libraries +!include "KhiopsGlobals.nsh" +!include "KhiopsPrerequisiteFunc.nsh" +!include "ReplaceInFile.nsh" + + + +# Definitions for registry change notification +!define SHCNE_ASSOCCHANGED 0x8000000 +!define SHCNF_IDLIST 0 + +# Get installation folder from registry if available +InstallDirRegKey HKLM Software\khiops "" + +# Request admin privileges +RequestExecutionLevel admin + +# Make it aware of HiDPI screens +ManifestDPIAware true + +# Macro to check input parameter definitions +!macro CheckInputParameter ParameterName + !ifndef ${ParameterName} + !error "${ParameterName} is not defined. Use the flag '-D${ParameterName}=...' to define it." + !endif +!macroend + +# Check the mandatory input definitions +!insertmacro CheckInputParameter KHIOPS_VERSION +!insertmacro CheckInputParameter KHIOPS_REDUCED_VERSION +!insertmacro CheckInputParameter KHIOPS_WINDOWS_BUILD_DIR +!insertmacro CheckInputParameter KHIOPS_VIZ_INSTALLER_PATH +!insertmacro CheckInputParameter KHIOPS_COVIZ_INSTALLER_PATH +!insertmacro CheckInputParameter JRE_PATH +!insertmacro CheckInputParameter MSMPI_INSTALLER_PATH +!insertmacro CheckInputParameter MSMPI_VERSION +!insertmacro CheckInputParameter KHIOPS_SAMPLES_DIR +!insertmacro CheckInputParameter KHIOPS_DOC_DIR + +# Application name and installer file name +Name "Khiops ${KHIOPS_VERSION}" +OutFile "khiops-${KHIOPS_VERSION}-setup.exe" + +######################## +# Variable definitions # +######################## + +# Requirements installation flags +Var /GLOBAL MPIInstallationNeeded + +# Requirements installation messages +Var /GLOBAL MPIInstallationMessage + +# Previous Uninstaller data +Var /GLOBAL PreviousUninstaller +Var /GLOBAL PreviousVersion + +# %Public%, %AllUsersProfile% (%ProgramData%) and samples directory +Var /GLOBAL WinPublicDir +Var /GLOBAL AllUsersProfileDir +Var /GLOBAL GlobalKhiopsDataDir +Var /GLOBAL SamplesInstallDir + +# Root key for the uninstaller in the windows registry +!define UninstallerKey "SOFTWARE\Microsoft\Windows\CurrentVersion\Uninstall" + +##################################### +# Modern UI Interface Configuration # +##################################### + +# General configuration +!define MUI_HEADERIMAGE +!define MUI_HEADERIMAGE_BITMAP ".\images\headerimage.bmp" +!define MUI_HEADERIMAGE_LEFT +!define MUI_WELCOMEFINISHPAGE_BITMAP ".\images\welcomefinish.bmp" +!define MUI_ABORTWARNING +!define MUI_ICON ".\images\installer.ico" +!define MUI_UNICON "${NSISDIR}\Contrib\Graphics\Icons\win-uninstall.ico" +BrandingText "Orange" + +# Welcome page +!define MUI_WELCOMEPAGE_TITLE "Welcome to the Khiops ${KHIOPS_VERSION} Setup Wizard" +!define MUI_WELCOMEPAGE_TEXT \ + "Khiops is a data mining tool includes data preparation and scoring, visualization, coclustering and covisualization.$\r$\n$\r$\n$\r$\n$\r$\n$(MUI_${MUI_PAGE_UNINSTALLER_PREFIX}TEXT_WELCOME_INFO_TEXT)" +!insertmacro MUI_PAGE_WELCOME + +# Licence page +!insertmacro MUI_PAGE_LICENSE "..\..\..\LICENSE" + +# Custom page for requirements software +Page custom RequirementsPageShow RequirementsPageLeave + +# Install directory choice page +!insertmacro MUI_PAGE_DIRECTORY + +# Install files choice page +!insertmacro MUI_PAGE_INSTFILES + +# Final page +!define MUI_FINISHPAGE_RUN +!define MUI_FINISHPAGE_RUN_TEXT "Create desktop shortcut" +!define MUI_FINISHPAGE_RUN_FUNCTION "CreateDesktopShortcuts" +!define MUI_FINISHPAGE_TEXT "$\r$\n$\r$\nThank you for installing Khiops." +!define MUI_FINISHPAGE_LINK "khiops.org" +!define MUI_FINISHPAGE_LINK_LOCATION "https://khiops.org" +!insertmacro MUI_PAGE_FINISH + +# Uninstaller pages +!insertmacro MUI_UNPAGE_CONFIRM +!insertmacro MUI_UNPAGE_INSTFILES + +# Language (must be defined after uninstaller) +!insertmacro MUI_LANGUAGE "English" + +####################### +# Version Information # +####################### + +VIProductVersion "${KHIOPS_REDUCED_VERSION}.0" +VIAddVersionKey /LANG=${LANG_ENGLISH} "ProductName" "Khiops" +VIAddVersionKey /LANG=${LANG_ENGLISH} "CompanyName" "Orange" +VIAddVersionKey /LANG=${LANG_ENGLISH} "LegalCopyright" "Copyright (c) 2024 Orange" +VIAddVersionKey /LANG=${LANG_ENGLISH} "FileDescription" "Khiops Installer" +VIAddVersionKey /LANG=${LANG_ENGLISH} "FileVersion" "${KHIOPS_VERSION}" + +###################### +# Installer Sections # +###################### + +Section "Install" SecInstall + # In order to have shortcuts and documents for all users + SetShellVarContext all + + # Detect Java + Call RequirementsDetection + + + # MPI installation is always required, because Khiops is linked with MPI DLL + ${If} $MPIInstallationNeeded == "1" + Call InstallMPI + ${EndIf} + + # Activate file overwrite + SetOverwrite on + + # Install executables and java libraries + SetOutPath "$INSTDIR\bin" + File "${KHIOPS_WINDOWS_BUILD_DIR}\bin\MODL.exe" + File "${KHIOPS_WINDOWS_BUILD_DIR}\bin\MODL_Coclustering.exe" + File "${KHIOPS_WINDOWS_BUILD_DIR}\bin\_khiopsgetprocnumber.exe" + File "${KHIOPS_WINDOWS_BUILD_DIR}\jars\norm.jar" + File "${KHIOPS_WINDOWS_BUILD_DIR}\jars\khiops.jar" + File "${KHIOPS_WINDOWS_BUILD_DIR}\tmp\khiops_env.cmd" + File "..\khiops.cmd" + File "..\khiops_coclustering.cmd" + + # Install Docs + SetOutPath "$INSTDIR" + File "/oname=LICENSE.txt" "..\..\..\LICENSE" + File "..\..\common\khiops\README.txt" + File "..\..\common\khiops\WHATSNEW.txt" + SetOutPath "$INSTDIR\doc" + File /nonfatal /a /r "${KHIOPS_DOC_DIR}\" + + # Install icons + SetOutPath "$INSTDIR\bin\icons" + File ".\images\installer.ico" + File "..\..\common\images\khiops.ico" + File "..\..\common\images\khiops_coclustering.ico" + + # Set the samples directory to be located either within %PUBLIC% or %ALLUSERSPROFILE% as fallback + ReadEnvStr $WinPublicDir PUBLIC + ReadEnvStr $AllUsersProfileDir ALLUSERSPROFILE + ${If} $WinPublicDir != "" + StrCpy $GlobalKhiopsDataDir "$WinPublicDir\khiops_data" + ${ElseIf} $AllUsersProfileDir != "" + StrCpy $GlobalKhiopsDataDir "$AllUsersProfileDir\khiops_data" + ${Else} + StrCpy $GlobalKhiopsDataDir "" + ${EndIf} + + # Debug message + !ifdef DEBUG + ${If} $GlobalKhiopsDataDir == "" + Messagebox MB_OK "Could find PUBLIC nor ALLUSERSPROFILE directories. Samples not installed." + ${Else} + Messagebox MB_OK "Samples will be installed at $GlobalKhiopsDataDir\samples." + ${EndIf} + !endif + + # Install samples only if the directory is defined + ${If} $GlobalKhiopsDataDir != "" + StrCpy $SamplesInstallDir "$GlobalKhiopsDataDir\samples" + SetOutPath "$SamplesInstallDir" + File "/oname=README.txt" "${KHIOPS_SAMPLES_DIR}\README.md" + SetOutPath "$SamplesInstallDir\Adult" + File "${KHIOPS_SAMPLES_DIR}\Adult\Adult.kdic" + File "${KHIOPS_SAMPLES_DIR}\Adult\Adult.txt" + SetOutPath "$SamplesInstallDir\Iris" + File "${KHIOPS_SAMPLES_DIR}\Iris\Iris.kdic" + File "${KHIOPS_SAMPLES_DIR}\Iris\Iris.txt" + SetOutPath "$SamplesInstallDir\Mushroom" + File "${KHIOPS_SAMPLES_DIR}\Mushroom\Mushroom.kdic" + File "${KHIOPS_SAMPLES_DIR}\Mushroom\Mushroom.txt" + SetOutPath "$SamplesInstallDir\Letter" + File "${KHIOPS_SAMPLES_DIR}\Letter\Letter.kdic" + File "${KHIOPS_SAMPLES_DIR}\Letter\Letter.txt" + SetOutPath "$SamplesInstallDir\SpliceJunction" + File "${KHIOPS_SAMPLES_DIR}\SpliceJunction\SpliceJunction.kdic" + File "${KHIOPS_SAMPLES_DIR}\SpliceJunction\SpliceJunction.txt" + File "${KHIOPS_SAMPLES_DIR}\SpliceJunction\SpliceJunctionDNA.txt" + SetOutPath "$SamplesInstallDir\Accidents" + File "${KHIOPS_SAMPLES_DIR}\Accidents\Accidents.kdic" + File "${KHIOPS_SAMPLES_DIR}\Accidents\Accidents.txt" + File "${KHIOPS_SAMPLES_DIR}\Accidents\Places.txt" + File "${KHIOPS_SAMPLES_DIR}\Accidents\Users.txt" + File "${KHIOPS_SAMPLES_DIR}\Accidents\Vehicles.txt" + File "${KHIOPS_SAMPLES_DIR}\Accidents\train.py" + File "/oname=README.txt" "${KHIOPS_SAMPLES_DIR}\Accidents\README.md" + SetOutPath "$SamplesInstallDir\Accidents\raw" + File "${KHIOPS_SAMPLES_DIR}\Accidents\raw\AccidentsPreprocess.kdic" + File "${KHIOPS_SAMPLES_DIR}\Accidents\raw\Description_BD_ONISR.pdf" + File "${KHIOPS_SAMPLES_DIR}\Accidents\raw\Licence_Ouverte.pdf" + File "${KHIOPS_SAMPLES_DIR}\Accidents\raw\caracteristiques-2018.csv" + File "${KHIOPS_SAMPLES_DIR}\Accidents\raw\lieux-2018.csv" + File "${KHIOPS_SAMPLES_DIR}\Accidents\raw\usagers-2018.csv" + File "${KHIOPS_SAMPLES_DIR}\Accidents\raw\vehicules-2018.csv" + File "${KHIOPS_SAMPLES_DIR}\Accidents\raw\preprocess.py" + File "/oname=README.txt" "${KHIOPS_SAMPLES_DIR}\Accidents\raw\README.md" + SetOutPath "$SamplesInstallDir\AccidentsSummary" + File "${KHIOPS_SAMPLES_DIR}\AccidentsSummary\Accidents.kdic" + File "${KHIOPS_SAMPLES_DIR}\AccidentsSummary\Accidents.txt" + File "${KHIOPS_SAMPLES_DIR}\AccidentsSummary\Vehicles.txt" + File "/oname=README.txt" "${KHIOPS_SAMPLES_DIR}\AccidentsSummary\README.md" + SetOutPath "$SamplesInstallDir\Customer" + File "${KHIOPS_SAMPLES_DIR}\Customer\Customer.kdic" + File "${KHIOPS_SAMPLES_DIR}\Customer\CustomerRecoded.kdic" + File "${KHIOPS_SAMPLES_DIR}\Customer\Customer.txt" + File "${KHIOPS_SAMPLES_DIR}\Customer\Address.txt" + File "${KHIOPS_SAMPLES_DIR}\Customer\Service.txt" + File "${KHIOPS_SAMPLES_DIR}\Customer\Usage.txt" + File "${KHIOPS_SAMPLES_DIR}\Customer\sort_and_recode_customer.py" + File "/oname=README.txt" "${KHIOPS_SAMPLES_DIR}\Customer\README.md" + SetOutPath "$SamplesInstallDir\Customer\unsorted" + File "${KHIOPS_SAMPLES_DIR}\Customer\unsorted\Customer-unsorted.txt" + File "${KHIOPS_SAMPLES_DIR}\Customer\unsorted\Address-unsorted.txt" + File "${KHIOPS_SAMPLES_DIR}\Customer\unsorted\Service-unsorted.txt" + File "${KHIOPS_SAMPLES_DIR}\Customer\unsorted\Usage-unsorted.txt" + SetOutPath "$SamplesInstallDir\CustomerExtended" + File "${KHIOPS_SAMPLES_DIR}\CustomerExtended\Customer.kdic" + File "${KHIOPS_SAMPLES_DIR}\CustomerExtended\CustomerRecoded.kdic" + File "${KHIOPS_SAMPLES_DIR}\CustomerExtended\Customer.txt" + File "${KHIOPS_SAMPLES_DIR}\CustomerExtended\Address.txt" + File "${KHIOPS_SAMPLES_DIR}\CustomerExtended\Service.txt" + File "${KHIOPS_SAMPLES_DIR}\CustomerExtended\Usage.txt" + File "${KHIOPS_SAMPLES_DIR}\CustomerExtended\City.txt" + File "${KHIOPS_SAMPLES_DIR}\CustomerExtended\Country.txt" + File "${KHIOPS_SAMPLES_DIR}\CustomerExtended\Product.txt" + File "${KHIOPS_SAMPLES_DIR}\CustomerExtended\recode_customer.py" + File "/oname=README.txt" "${KHIOPS_SAMPLES_DIR}\CustomerExtended\README.md" + ${EndIf} + + # Install JRE + SetOutPath $INSTDIR\jre + File /nonfatal /a /r "${JRE_PATH}\" + + # Install Khiops Visualization App + + # Add the installer file + SetOutPath $TEMP + File ${KHIOPS_VIZ_INSTALLER_PATH} + + # Execute Khiops visualization installer: + # - It is not executed with silent mode so the user can customize the install + # - It is executed with "cmd /C" so it opens the installer options window + Var /Global KHIOPS_VIZ_INSTALLER_FILENAME + ${GetFileName} ${KHIOPS_VIZ_INSTALLER_PATH} $KHIOPS_VIZ_INSTALLER_FILENAME + ${If} ${Silent} + nsexec::Exec 'cmd /C "$KHIOPS_VIZ_INSTALLER_FILENAME /S"' + ${Else} + nsexec::Exec 'cmd /C "$KHIOPS_VIZ_INSTALLER_FILENAME"' + ${EndIf} + Pop $0 + DetailPrint "Installation of Khiops visualization: $0" + + # Delete the installer + Delete "$TEMP\KHIOPS_VIZ_INSTALLER_FILENAME" + + + # Execute Khiops covisualization installer: + # Same rules as above with the visualization + + # Files to install in installer directory + File ${KHIOPS_COVIZ_INSTALLER_PATH} + + Var /Global KHIOPS_COVIZ_INSTALLER_FILENAME + ${GetFileName} ${KHIOPS_COVIZ_INSTALLER_PATH} $KHIOPS_COVIZ_INSTALLER_FILENAME + ${If} ${Silent} + nsexec::Exec 'cmd /C "$TEMP\$KHIOPS_COVIZ_INSTALLER_FILENAME /S"' + ${Else} + nsexec::Exec 'cmd /C "$TEMP\$KHIOPS_COVIZ_INSTALLER_FILENAME"' + ${EndIf} + Pop $0 + DetailPrint "Installation of Khiops covisualization: $0" + + # Delete the installer + Delete "$TEMP\$KHIOPS_COVIZ_INSTALLER_FILENAME" + + + ############################# + # Finalize the installation # + ############################# + + # Setting up the GUI in khiops_env.cmd: replace @GUI_STATUS@ by "true" in the installed file + Push @GUI_STATUS@ + Push 'true' + Push all + Push all + Push $INSTDIR\bin\khiops_env.cmd + Call ReplaceInFile + + # Setting up MPI in khiops_env.cmd: replace @SET_MPI@ by "SET_MPI_SYSTEM_WIDE" in the installed file + Push @SET_MPI@ + Push SET_MPI_SYSTEM_WIDE + Push all + Push all + Push $INSTDIR\bin\khiops_env.cmd + Call ReplaceInFile + + # Setting up IS_CONDA_VAR variable in khiops_env.cmd: replace @SET_MPI@ by an empty string: this is not an installer for conda + Push @IS_CONDA_VAR@ + Push "" + Push all + Push all + Push $INSTDIR\bin\khiops_env.cmd + Call ReplaceInFile + + # Create the Khiops shell + FileOpen $0 "$INSTDIR\bin\shell_khiops.cmd" w + FileWrite $0 '@echo off$\r$\n' + FileWrite $0 'REM Open a shell session with access to Khiops$\r$\n' + FileWrite $0 `if "%KHIOPS_HOME%".=="". set KHIOPS_HOME=$INSTDIR$\r$\n` + FileWrite $0 'set path=%KHIOPS_HOME%\bin;%path%$\r$\n' + FileWrite $0 'title Shell Khiops$\r$\n' + FileWrite $0 '%comspec% /K "echo Welcome to Khiops scripting mode & echo Type khiops -h or khiops_coclustering -h to get help' + FileClose $0 + + # Create the uninstaller + WriteUninstaller "$INSTDIR\uninstall-khiops.exe" + + + ##################################### + # Windows environment customization # + # ################################### + + # Write registry keys to add Khiops in the Add/Remove Programs pane + WriteRegStr HKLM "Software\Khiops" "" $INSTDIR + WriteRegStr HKLM "${UninstallerKey}\Khiops" "UninstallString" '"$INSTDIR\uninstall-khiops.exe"' + WriteRegStr HKLM "${UninstallerKey}\Khiops" "InstallLocation" "$INSTDIR" + WriteRegStr HKLM "${UninstallerKey}\Khiops" "DisplayName" "Khiops" + WriteRegStr HKLM "${UninstallerKey}\Khiops" "Publisher" "Orange" + WriteRegStr HKLM "${UninstallerKey}\Khiops" "DisplayIcon" "$INSTDIR\bin\icons\installer.ico" + WriteRegStr HKLM "${UninstallerKey}\Khiops" "DisplayVersion" "${KHIOPS_VERSION}" + WriteRegStr HKLM "${UninstallerKey}\Khiops" "URLInfoAbout" "http://khiops.org" + WriteRegDWORD HKLM "${UninstallerKey}\Khiops" "NoModify" "1" + WriteRegDWORD HKLM "${UninstallerKey}\Khiops" "NoRepair" "1" + + # Set as the startup dir for all executable shortcuts (yes it is done with SetOutPath!) + ${If} $GlobalKhiopsDataDir != "" + SetOutPath $GlobalKhiopsDataDir + ${Else} + SetOutPath $INSTDIR + ${EndIf} + + # Create application shortcuts in the installation directory + DetailPrint "Installing Start menu Shortcut..." + CreateShortCut "$INSTDIR\Khiops.lnk" "$INSTDIR\bin\khiops.cmd" "" "$INSTDIR\bin\icons\khiops.ico" 0 SW_SHOWMINIMIZED + CreateShortCut "$INSTDIR\Khiops Coclustering.lnk" "$INSTDIR\bin\khiops_coclustering.cmd" "" "$INSTDIR\bin\icons\khiops_coclustering.ico" 0 SW_SHOWMINIMIZED + ExpandEnvStrings $R0 "%COMSPEC%" + CreateShortCut "$INSTDIR\Shell Khiops.lnk" "$INSTDIR\bin\shell_khiops.cmd" "" "$R0" + + # Create start menu shortcuts for the executables and documentation + DetailPrint "Installing Start menu Shortcut..." + CreateDirectory "$SMPROGRAMS\Khiops" + CreateShortCut "$SMPROGRAMS\Khiops\Khiops.lnk" "$INSTDIR\bin\khiops.cmd" "" "$INSTDIR\bin\icons\khiops.ico" 0 SW_SHOWMINIMIZED + CreateShortCut "$SMPROGRAMS\Khiops\Khiops Coclustering.lnk" "$INSTDIR\bin\khiops_coclustering.cmd" "" "$INSTDIR\bin\icons\khiops_coclustering.ico" 0 SW_SHOWMINIMIZED + ExpandEnvStrings $R0 "%COMSPEC%" + CreateShortCut "$SMPROGRAMS\Khiops\Shell Khiops.lnk" "$INSTDIR\bin\shell_khiops.cmd" "" "$R0" + CreateShortCut "$SMPROGRAMS\Khiops\Uninstall.lnk" "$INSTDIR\uninstall-khiops.exe" + CreateDirectory "$SMPROGRAMS\Khiops\doc" + CreateShortCut "$SMPROGRAMS\Khiops\doc\Tutorial.lnk" "$INSTDIR\doc\KhiopsTutorial.pdf" + CreateShortCut "$SMPROGRAMS\Khiops\doc\Khiops.lnk" "$INSTDIR\doc\KhiopsGuide.pdf" + CreateShortCut "$SMPROGRAMS\Khiops\doc\Khiops Coclustering.lnk" "$INSTDIR\doc\KhiopsCoclusteringGuide.pdf" + CreateShortCut "$SMPROGRAMS\Khiops\doc\Khiops Visualization.lnk" "$INSTDIR\doc\KhiopsVisualizationGuide.pdf" + CreateShortCut "$SMPROGRAMS\Khiops\doc\Khiops Covisualization.lnk" "$INSTDIR\doc\KhiopsCovisualizationGuide.pdf" + SetOutPath "$INSTDIR" + + # Define aliases for the following registry keys (also used in the uninstaller section) + # - HKLM (all users) + # - HKCU (current user) + !define env_hklm 'HKLM "SYSTEM\CurrentControlSet\Control\Session Manager\Environment"' + !define env_hkcu 'HKCU "Environment"' + + # Set KHIOPS_HOME for the local machine and current user + WriteRegExpandStr ${env_hklm} "KHIOPS_HOME" "$INSTDIR" + WriteRegExpandStr ${env_hkcu} "KHIOPS_HOME" "$INSTDIR" + + # Make sure windows knows about the change + SendMessage ${HWND_BROADCAST} ${WM_WININICHANGE} 0 "STR:Environment" /TIMEOUT=5000 + + # Register file association for Khiops visualisation tools # + # inspired from examples\makensis.nsi + + # Khiops dictionary file extension + ReadRegStr $R0 HKCR ".kdic" "" + ${if} $R0 == "Khiops.Dictionary.File" + DeleteRegKey HKCR "Khiops.Dictionary.File" + ${EndIf} + WriteRegStr HKCR ".kdic" "" "Khiops.Dictionary.File" + WriteRegStr HKCR "Khiops.Dictionary.File" "" "Khiops Dictionary File" + ReadRegStr $R0 HKCR "Khiops.Dictionary.File\shell\open\command" "" + ${If} $R0 == "" + WriteRegStr HKCR "Khiops.Dictionary.File\shell" "" "open" + WriteRegStr HKCR "Khiops.Dictionary.File\shell\open\command" "" 'notepad.exe "%1"' + ${EndIf} + + # Khiops scenario file + ReadRegStr $R0 HKCR "._kh" "" + ${if} $R0 == "Khiops.File" + DeleteRegKey HKCR "Khiops.File" + ${EndIf} + WriteRegStr HKCR "._kh" "" "Khiops.File" + WriteRegStr HKCR "Khiops.File" "" "Khiops File" + WriteRegStr HKCR "Khiops.File\DefaultIcon" "" "$INSTDIR\bin\icons\khiops.ico" + ReadRegStr $R0 HKCR "Khiops.File\shell\open\command" "" + ${If} $R0 == "" + WriteRegStr HKCR "Khiops.File\shell" "" "open" + WriteRegStr HKCR "Khiops.File\shell\open\command" "" 'notepad.exe "%1"' + ${EndIf} + WriteRegStr HKCR "Khiops.File\shell\compile" "" "Execute Khiops Script" + WriteRegStr HKCR "Khiops.File\shell\compile\command" "" '"$INSTDIR\bin\khiops.cmd" -i "%1"' + + # Khiops coclustering scenario file + ReadRegStr $R0 HKCR "._khc" "" + ${if} $R0 == "Khiops.Coclustering.File" + DeleteRegKey HKCR "Khiops.Coclustering.File" + ${EndIf} + WriteRegStr HKCR "._khc" "" "Khiops.Coclustering.File" + WriteRegStr HKCR "Khiops.Coclustering.File" "" "Khiops Coclustering File" + WriteRegStr HKCR "Khiops.Coclustering.File\DefaultIcon" "" "$INSTDIR\bin\icons\khiops_coclustering.ico" + ReadRegStr $R0 HKCR "Khiops.Coclustering.File\shell\open\command" "" + ${If} $R0 == "" + WriteRegStr HKCR "Khiops.Coclustering.File\shell" "" "open" + WriteRegStr HKCR "Khiops.Coclustering.File\shell\open\command" "" 'notepad.exe "%1"' + ${EndIf} + WriteRegStr HKCR "Khiops.Coclustering.File\shell\compile" "" "Execute Khiops Coclustering Script" + WriteRegStr HKCR "Khiops.Coclustering.File\shell\compile\command" "" '"$INSTDIR\bin\khiops_coclustering.cmd" -i "%1"' + + # Notify the file extension changes + System::Call 'Shell32::SHChangeNotify(i ${SHCNE_ASSOCCHANGED}, i ${SHCNF_IDLIST}, i 0, i 0)' + + # Debug message + !ifdef DEBUG + Messagebox MB_OK "Installation finished!" + !endif + +SectionEnd + + +############### +# Uninstaller # +############### + +Section "Uninstall" + # In order to have shortcuts and documents for all users + SetShellVarContext all + + # Restore Registry # + # Unregister file associations + DetailPrint "Uninstalling Khiops Shell Extensions..." + + # Unregister Khiops dictionary file extension + ${If} $R0 == "Khiops.Dictionary.File" + DeleteRegKey HKCR ".kdic" + ${EndIf} + DeleteRegKey HKCR "Khiops.Dictionary.File" + + # Unregister Khiops file extension + ${If} $R0 == "Khiops.File" + DeleteRegKey HKCR "._kh" + ${EndIf} + DeleteRegKey HKCR "Khiops.File" + + # Unregister Khiops coclustering file extension + ${If} $R0 == "Khiops.Coclustering.File" + DeleteRegKey HKCR "._khc" + ${EndIf} + DeleteRegKey HKCR "Khiops.Coclustering.File" + + # Notify file extension changes + System::Call 'Shell32::SHChangeNotify(i ${SHCNE_ASSOCCHANGED}, i ${SHCNF_IDLIST}, i 0, i 0)' + + # Delete installation folder key + DeleteRegKey HKLM "${UninstallerKey}\Khiops" + DeleteRegKey HKLM "Software\Khiops" + + # Delete environement variable KHIOPS_HOME + DeleteRegValue ${env_hklm} "KHIOPS_HOME" + DeleteRegValue ${env_hkcu} "KHIOPS_HOME" + + # Delete deprecated environment variable KhiopsHome + DeleteRegValue ${env_hklm} "KhiopsHome" + DeleteRegValue ${env_hkcu} "KhiopsHome" + + # Make sure windows knows about the changes in the environment + SendMessage ${HWND_BROADCAST} ${WM_WININICHANGE} 0 "STR:Environment" /TIMEOUT=5000 + + # Delete files # + # Note: Some directories are removed only if they are completely empty (no "/r" RMDir flag) + DetailPrint "Deleting Files ..." + + # Delete docs + Delete "$INSTDIR\LICENSE.txt" + Delete "$INSTDIR\README.txt" + Delete "$INSTDIR\WHATSNEW.txt" + RMDir /r "$INSTDIR\doc" + + # Delete jre + RMDir /r "$INSTDIR\jre" + + # Delete icons + RMDir /r "$INSTDIR\bin\icons" + + # Delete executables and scripts + Delete "$INSTDIR\bin\khiops_env.cmd" + Delete "$INSTDIR\bin\khiops.cmd" + Delete "$INSTDIR\bin\khiops_coclustering.cmd" + Delete "$INSTDIR\bin\MODL.exe" + Delete "$INSTDIR\bin\MODL_Coclustering.exe" + Delete "$INSTDIR\bin\_khiopsgetprocnumber.exe" + Delete "$INSTDIR\bin\norm.jar" + Delete "$INSTDIR\bin\khiops.jar" + Delete "$INSTDIR\bin\shell_khiops.cmd" + RMDir "$INSTDIR\bin" + + # Delete shortcuts from install dir + Delete "$INSTDIR\Khiops.lnk" + Delete "$INSTDIR\Khiops Coclustering.lnk" + Delete "$INSTDIR\Shell Khiops.lnk" + + # Delete the installer + Delete "$INSTDIR\uninstall-khiops.exe" + + # Remove install directory + RMDir "$INSTDIR" + + # Delete desktop shortcuts + Delete "$DESKTOP\Khiops.lnk" + Delete "$DESKTOP\Khiops Coclustering.lnk" + Delete "$DESKTOP\Shell Khiops.lnk" + + # Delete Start Menu Shortcuts + RMDir /r "$SMPROGRAMS\Khiops" + + # Set the samples directory to be located either within %PUBLIC% or %ALLUSERSPROFILE% as fallback + ReadEnvStr $WinPublicDir PUBLIC + ReadEnvStr $AllUsersProfileDir ALLUSERSPROFILE + ${If} $WinPublicDir != "" + StrCpy $GlobalKhiopsDataDir "$WinPublicDir\khiops_data" + ${ElseIf} $AllUsersProfileDir != "" + StrCpy $GlobalKhiopsDataDir "$AllUsersProfileDir\khiops_data" + ${Else} + StrCpy $GlobalKhiopsDataDir "" + ${EndIf} + + # Delete sample datasets + # We do not remove the whole directory to save the users results from Khiops' analyses + ${If} $GlobalKhiopsDataDir != "" + StrCpy $SamplesInstallDir "$GlobalKhiopsDataDir\samples" + Delete "$SamplesInstallDir\AccidentsSummary\Accidents.kdic" + Delete "$SamplesInstallDir\AccidentsSummary\Accidents.txt" + Delete "$SamplesInstallDir\AccidentsSummary\README.txt" + Delete "$SamplesInstallDir\AccidentsSummary\Vehicles.txt" + Delete "$SamplesInstallDir\Accidents\Accidents.kdic" + Delete "$SamplesInstallDir\Accidents\Accidents.txt" + Delete "$SamplesInstallDir\Accidents\Places.txt" + Delete "$SamplesInstallDir\Accidents\README.txt" + Delete "$SamplesInstallDir\Accidents\Users.txt" + Delete "$SamplesInstallDir\Accidents\Vehicles.txt" + Delete "$SamplesInstallDir\Accidents\raw\AccidentsPreprocess.kdic" + Delete "$SamplesInstallDir\Accidents\raw\Description_BD_ONISR.pdf" + Delete "$SamplesInstallDir\Accidents\raw\Licence_Ouverte.pdf" + Delete "$SamplesInstallDir\Accidents\raw\README.txt" + Delete "$SamplesInstallDir\Accidents\raw\caracteristiques-2018.csv" + Delete "$SamplesInstallDir\Accidents\raw\lieux-2018.csv" + Delete "$SamplesInstallDir\Accidents\raw\preprocess.py" + Delete "$SamplesInstallDir\Accidents\raw\usagers-2018.csv" + Delete "$SamplesInstallDir\Accidents\raw\vehicules-2018.csv" + Delete "$SamplesInstallDir\Accidents\train.py" + Delete "$SamplesInstallDir\Adult\Adult.kdic" + Delete "$SamplesInstallDir\Adult\Adult.txt" + Delete "$SamplesInstallDir\CustomerExtended\Address.txt" + Delete "$SamplesInstallDir\CustomerExtended\City.txt" + Delete "$SamplesInstallDir\CustomerExtended\Country.txt" + Delete "$SamplesInstallDir\CustomerExtended\Customer.kdic" + Delete "$SamplesInstallDir\CustomerExtended\Customer.txt" + Delete "$SamplesInstallDir\CustomerExtended\CustomerRecoded.kdic" + Delete "$SamplesInstallDir\CustomerExtended\Product.txt" + Delete "$SamplesInstallDir\CustomerExtended\README.txt" + Delete "$SamplesInstallDir\CustomerExtended\Service.txt" + Delete "$SamplesInstallDir\CustomerExtended\Usage.txt" + Delete "$SamplesInstallDir\CustomerExtended\recode_customer.py" + Delete "$SamplesInstallDir\Customer\Address.txt" + Delete "$SamplesInstallDir\Customer\Customer.kdic" + Delete "$SamplesInstallDir\Customer\Customer.txt" + Delete "$SamplesInstallDir\Customer\CustomerRecoded.kdic" + Delete "$SamplesInstallDir\Customer\README.txt" + Delete "$SamplesInstallDir\Customer\Service.txt" + Delete "$SamplesInstallDir\Customer\Usage.txt" + Delete "$SamplesInstallDir\Customer\sort_and_recode_customer.py" + Delete "$SamplesInstallDir\Customer\unsorted\Address-unsorted.txt" + Delete "$SamplesInstallDir\Customer\unsorted\Customer-unsorted.txt" + Delete "$SamplesInstallDir\Customer\unsorted\Service-unsorted.txt" + Delete "$SamplesInstallDir\Customer\unsorted\Usage-unsorted.txt" + Delete "$SamplesInstallDir\Iris\Iris.kdic" + Delete "$SamplesInstallDir\Iris\Iris.txt" + Delete "$SamplesInstallDir\Letter\Letter.kdic" + Delete "$SamplesInstallDir\Letter\Letter.txt" + Delete "$SamplesInstallDir\Mushroom\Mushroom.kdic" + Delete "$SamplesInstallDir\Mushroom\Mushroom.txt" + Delete "$SamplesInstallDir\README.txt" + Delete "$SamplesInstallDir\SpliceJunction\SpliceJunction.kdic" + Delete "$SamplesInstallDir\SpliceJunction\SpliceJunction.txt" + Delete "$SamplesInstallDir\SpliceJunction\SpliceJunctionDNA.txt" + RMDir "$SamplesInstallDir\AccidentsSummary\" + RMDir "$SamplesInstallDir\Accidents\raw\" + RMDir "$SamplesInstallDir\Accidents\" + RMDir "$SamplesInstallDir\Adult\" + RMDir "$SamplesInstallDir\CustomerExtended\" + RMDir "$SamplesInstallDir\Customer\unsorted\" + RMDir "$SamplesInstallDir\Customer\" + RMDir "$SamplesInstallDir\Iris\" + RMDir "$SamplesInstallDir\Letter\" + RMDir "$SamplesInstallDir\Mushroom\" + RMDir "$SamplesInstallDir\SpliceJunction\" + RMDir "$SamplesInstallDir" + ${EndIf} +SectionEnd + + +####################### +# Installer Functions # +####################### + +Function "CreateDesktopShortcuts" + # Set as the startup dir for all executable shortcuts (yes it is done with SetOutPath!) + ${If} $GlobalKhiopsDataDir != "" + SetOutPath $GlobalKhiopsDataDir + ${Else} + SetOutPath $INSTDIR + ${EndIf} + + # Create the shortcuts + DetailPrint "Installing Desktop Shortcut..." + CreateShortCut "$DESKTOP\Khiops.lnk" "$INSTDIR\bin\khiops.cmd" "" "$INSTDIR\bin\icons\khiops.ico" 0 SW_SHOWMINIMIZED + CreateShortCut "$DESKTOP\Khiops Coclustering.lnk" "$INSTDIR\bin\khiops_coclustering.cmd" "" "$INSTDIR\bin\icons\khiops_coclustering.ico" 0 SW_SHOWMINIMIZED +FunctionEnd + +# Predefined initialization install function +Function .onInit + + # Read location of the uninstaller + ReadRegStr $PreviousUninstaller HKLM "${UninstallerKey}\Khiops" "UninstallString" + ReadRegStr $PreviousVersion HKLM "${UninstallerKey}\Khiops" "DisplayVersion" + + # Ask the user to proceed if there was already a previous Khiops version installed + # In silent mode: remove previous version + ${If} $PreviousUninstaller != "" + MessageBox MB_OKCANCEL|MB_ICONEXCLAMATION \ + "Khiops $PreviousVersion is already installed. $\n$\nClick OK to remove the \ + previous version $\n$\nor Cancel to cancel this upgrade." \ + /SD IDOK IDOK uninst + Abort + + # Run the uninstaller + uninst: + ClearErrors + ExecWait '$PreviousUninstaller /S _?=$INSTDIR' + + # Run again the uninstaller to delete the uninstaller itself and the root dir (without waiting) + # Must not be used in silent mode (may delete files from silent following installation) + ${IfNot} ${Silent} + ExecWait '$PreviousUninstaller /S' + ${EndIf} + ${EndIf} + + # Choice of default installation directory, for windows 32 or 64 + ${If} $INSTDIR == "" + ${If} ${RunningX64} + StrCpy $INSTDIR "$PROGRAMFILES64\khiops" + # No 32-bit install + ${EndIf} + ${EndIf} +FunctionEnd + + +# Function to show the page for requirements +Function RequirementsPageShow + # Detect requirements + Call RequirementsDetection + + # Creation of page, with title and subtitle + nsDialogs::Create 1018 + !insertmacro MUI_HEADER_TEXT "Check software requirements" "Check Microsoft MPI" + + # Message to show for the Microsoft MPI installation + ${NSD_CreateLabel} 0 20u 100% 10u $MPIInstallationMessage + + # Show page + nsDialogs::Show +FunctionEnd + + +# Requirements detection +# - Detects if the system architecture is 64-bit +# - Detects whether Java JRE and MPI are installed and their versions +Function RequirementsDetection + # Abort installation if the machine does not have 64-bit architecture + ${IfNot} ${RunningX64} + Messagebox MB_OK "Khiops works only on Windows 64 bits: installation will be terminated." /SD IDOK + Quit + ${EndIf} + + # Decide if MPI is required by detecting the number of cores + # Note: This call defines MPIInstalledVersion + Call DetectAndLoadMPIEnvironment + + # Try to install MPI + StrCpy $MPIInstallationNeeded "0" + StrCpy $MPIInstallationMessage "" + + # If it is not installed install it + ${If} $MPIInstalledVersion == "0" + StrCpy $MPIInstallationMessage "Microsoft MPI version ${MSMPI_VERSION} will be installed" + StrCpy $MPIInstallationNeeded "1" + # Otherwise install only if the required version is newer than the installed one + ${Else} + ${VersionCompare} "${MPIRequiredVersion}" "$MPIInstalledVersion" $0 + ${If} $0 == 1 + StrCpy $MPIInstallationMessage "Microsoft MPI will be upgraded to version ${MSMPI_VERSION}" + StrCpy $MPIInstallationNeeded "1" + ${Else} + StrCpy $MPIInstallationMessage "Microsoft MPI version already installed" + ${EndIf} + ${EndIf} + + + # Show debug information + !ifdef DEBUG + Messagebox MB_OK "MS-MPI: needed=$MPIInstallationNeeded required=${MPIRequiredVersion} installed=$MPIInstalledVersion" + !endif + +FunctionEnd + +# No leave page for required software +Function RequirementsPageLeave +FunctionEnd diff --git a/scripts/get_mpi_implementation.cmake b/scripts/get_mpi_implementation.cmake new file mode 100644 index 000000000..533ce92e0 --- /dev/null +++ b/scripts/get_mpi_implementation.cmake @@ -0,0 +1,60 @@ +# Detect MPI implementation. If an implementation is detected, MPI_IMPL is set with "openmpi", "mpich" or "intel". Use +# the global variable IS_CONDA +function(get_mpi_implementation) + # On standard environment, we search implementation names in the MPI_LIBRARIES variable provided by find_mpi. Inside + # conda environment, the library path is the same for all libraries. So we use the 'mpi' variable that is defined + # (only) in the build environment. Outside the build environment, the 'mpi' variable is not defined, we use the list + # of installed packages. It works because, the mpi packages are in conflict (only one implementation on the same + # environment) + if(IS_CONDA) + if(DEFINED ENV{mpi}) + set(DETECTION_MESSAGE "from conda build environment") + set(VAR_MPI_INFO $ENV{mpi}) + else() + set(DETECTION_MESSAGE "from conda standard environment") + execute_process( + COMMAND conda list + COMMAND grep mpi + OUTPUT_VARIABLE VAR_MPI_INFO) + endif() + else(IS_CONDA) + # Outside conda, we use the path given by find_mpi + set(DETECTION_MESSAGE "from standard environment") + set(VAR_MPI_INFO "${MPI_LIBRARIES}") + endif(IS_CONDA) + + # ERROR if VAR_MPI_INFO is not defined, it means either: - in standard environment find_mpi provides no MPI path (MPI + # is not installed) - or in conda build, the 'mpi' variable is missing and find_mpi may find the system wide mpi, this + # is not not what we want. - or in conda, outside of the build process, the mpi package is not installed and find_mpi + # may find the system wide mpi. + if(NOT DEFINED VAR_MPI_INFO OR "${VAR_MPI_INFO}" STREQUAL "") + message(FATAL_ERROR "Missing information to discover the MPI implementation") + endif() + + # Find "openmpi", "mpich" or "intel" in the variable VAR_MPI_INFO + string(FIND "${VAR_MPI_INFO}" openmpi POS) + if(POS GREATER -1) + set(MPI_IMPL "openmpi") + endif() + + string(FIND "${VAR_MPI_INFO}" mpich POS) + if(POS GREATER -1) + set(MPI_IMPL "mpich") + endif() + + string(FIND "${VAR_MPI_INFO}" intel POS) + if(POS GREATER -1) + set(MPI_IMPL "intel") + endif() + + if(MPI_IMPL) + message(STATUS "Auto-detected MPI implementation: ${MPI_IMPL} (${DETECTION_MESSAGE})") + else() + message(STATUS "Unable to detect the MPI implementation: no suffix will be added to binaries name") + endif() + + # Transmits MPI_IMPL to parent scope + set(MPI_IMPL + ${MPI_IMPL} + PARENT_SCOPE) +endfunction() diff --git a/scripts/khiops-package-version b/scripts/khiops-package-version deleted file mode 100755 index eedbe59c4..000000000 --- a/scripts/khiops-package-version +++ /dev/null @@ -1,51 +0,0 @@ -#! /usr/bin/env bash -# Creates a "package-version" of Khiops -# - if the commit is tagged: -# - if the matches the in the sources: -# - package-version = -# - otherwise: -# - warn -# - package-version = -preview- -# - otherwise: -# - package-version = -preview- - -# Common safeguards -set -euo pipefail - -# Save this script's directory -SCRIPT_DIR="$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)" - -main () { - # The ref_name it should be either a hash or a tag - declare ref_name="$1" - - # Obtain the khiops version from the source - local script_dir - local khiops_version - khiops_version="$("$SCRIPT_DIR/khiops-version")" - - # See the header of the script to understand this part - local khiops_package_version - - # Case of a tag - if git describe --tags --exact-match "$ref_name" 1> /dev/null 2> /dev/null - then - if [[ "$ref_name" == "v${khiops_version}" ]] - then - khiops_package_version="$khiops_version" - else - khiops_package_version="$khiops_version-preview-$ref_name" - echo "::warning: Tag '$ref_name' doesn't match the Khiops source version '$khiops_version'" 1>&2 - echo "::warning: Creating preview package version string '$khiops_package_version'" 1>&2 - fi - # Case of another reference name (branch name or sha) - else - short_hash="$(git rev-parse --short --verify "$ref_name" | cut -d' ' -f1)" - khiops_package_version="$khiops_version-preview-$short_hash" - echo "::warning: Ref name '$ref_name' is not a tag" 1>&2 - echo "::warning: Creating preview package version string '$khiops_package_version'" 1>&2 - fi - echo "$khiops_package_version" -} - -main "$@" diff --git a/scripts/update-copyright.py b/scripts/update-copyright.py index c13b82861..345650c3d 100644 --- a/scripts/update-copyright.py +++ b/scripts/update-copyright.py @@ -1,44 +1,75 @@ +# Copyright (c) 2024 Orange. All rights reserved. +# This software is distributed under the BSD 3-Clause-clear License, the text of which is available +# at https://spdx.org/licenses/BSD-3-Clause-Clear.html or see the "LICENSE" file for more details. + """Updates the copyright notice of the input files""" import argparse +import filecmp import os +import tempfile +import sys +import shutil from datetime import datetime # pylint: disable=line-too-long byte_linesep = bytes(os.linesep, encoding="ascii") copyright_banner_lines = [ bytes( - f"// Copyright (c) {datetime.today().year} Orange. All rights reserved.", + f"Copyright (c) {datetime.today().year} Orange. All rights reserved.", encoding="ascii", ), - b"// This software is distributed under the BSD 3-Clause-clear License, the text of which is available", - b'// at https://spdx.org/licenses/BSD-3-Clause-Clear.html or see the "LICENSE" file for more details.', + b"This software is distributed under the BSD 3-Clause-clear License, the text of which is available", + b'at https://spdx.org/licenses/BSD-3-Clause-Clear.html or see the "LICENSE" file for more details.', ] # pylint: enable=line-too-long def main(args): """Main method""" + # Process files and keep track if there were modifications + were_files_modified = False for file_path in args.file_paths: - update_copyright(file_path) + is_file_modified = update_copyright(file_path) + if is_file_modified: + were_files_modified = True + print(f"Updated {file_path}") + + # Set the return code + return_code = 0 + if were_files_modified: + return_code = 1 + + return return_code def update_copyright(file_path): - """Updates the copyright notice of a file""" - print(f"Updating {file_path}") + """Updates the copyright notice of a file if necessary""" + + # Obtain the comment prefix string from the file extension + _, ext = os.path.splitext(file_path) + if ext in (".h", ".c", ".hpp", ".cpp", ".java"): + comment_prefix = b"// " + elif ext == ".py": + comment_prefix = b"# " + else: + raise ValueError(f"Unsupported file extension '{ext}'.") # Read the lines from the source file with open(file_path, "rb") as file: lines = file.readlines() - # Then write the file as-is + # Write the contents to a temporary file + # NB: We clean manually the tmp file because the auto-delete creates problems in Win + is_file_modified = False skipped_copyright = False - with open(file_path, "wb") as file: + with tempfile.NamedTemporaryFile(delete=False) as tmp_stream: # Write the current copyright, followed by an empty line for line in copyright_banner_lines: - file.write(line) - file.write(byte_linesep) - file.write(byte_linesep) + tmp_stream.write(comment_prefix) + tmp_stream.write(line) + tmp_stream.write(byte_linesep) + tmp_stream.write(byte_linesep) # Rewrite the file as follows # - Skip the old copyright @@ -47,9 +78,9 @@ def update_copyright(file_path): for n, line in enumerate(lines): line = line.rstrip() if ( - line.startswith(b"// Copyright (c)") - or line.startswith(b"// This software is distributed") - or line.startswith(b"// at https://spdx.org") + line.startswith(comment_prefix + b"Copyright (c)") + or line.startswith(comment_prefix + b"This software is distributed") + or line.startswith(comment_prefix + b"at https://spdx.org") ) and not skipped_copyright: continue else: @@ -61,8 +92,22 @@ def update_copyright(file_path): skipped_copyright = True # Beware: all lines must end with an end of line, including the last line # (otherwise, the Windows RC compiler does not work) - file.write(line) - file.write(byte_linesep) + tmp_stream.write(line) + tmp_stream.write(byte_linesep) + + # Flush the file contents + tmp_stream.flush() + + # If the temporary file contents do not match the original, replace it + tmp_file_path = tmp_stream.name + if not filecmp.cmp(file_path, tmp_file_path, shallow=False): + is_file_modified = True + shutil.copyfile(tmp_stream.name, file_path) + + # Clean the temporary file + os.remove(tmp_file_path) + + return is_file_modified if __name__ == "__main__": @@ -77,4 +122,4 @@ def update_copyright(file_path): nargs="+", help="One or more source code files", ) - main(parser.parse_args()) + sys.exit(main(parser.parse_args())) diff --git a/src/Learning/DTForest/DTDecisionTreeCreationTask.cpp b/src/Learning/DTForest/DTDecisionTreeCreationTask.cpp index ed42fb915..4493be1ce 100644 --- a/src/Learning/DTForest/DTDecisionTreeCreationTask.cpp +++ b/src/Learning/DTForest/DTDecisionTreeCreationTask.cpp @@ -1196,10 +1196,6 @@ boolean DTDecisionTreeCreationTask::ComputeResourceRequirements() // initialisation des parametres permettant les diverses estimations de memoire (esclave, maitre...) InitializeMemoryEstimations(); - // nombre de process esclaves : ne pas en demander plus qu'il n'y a d'arbres a creer, car c'est inutile - nMaxSlaveProcessNumber = ComputeMaxSlaveProcessNumber(); - assert(nMaxSlaveProcessNumber > 0); - // Estimation de la memoire partagee lSharedMemory = ComputeSharedNecessaryMemory(); @@ -1210,7 +1206,7 @@ boolean DTDecisionTreeCreationTask::ComputeResourceRequirements() lBiggestTreeMemory = ComputeBiggestTreeNecessaryMemory(); // Mise a jour des demandes de resources - GetResourceRequirements()->SetMaxSlaveProcessNumber(nMaxSlaveProcessNumber); + GetResourceRequirements()->SetMaxSlaveProcessNumber(nMaxCreatedAttributeNumber); GetResourceRequirements()->GetSharedRequirement()->GetMemory()->Set(lSharedMemory); GetResourceRequirements()->GetMasterRequirement()->GetMemory()->Set(lMasterMemory); GetResourceRequirements()->GetSlaveRequirement()->GetMemory()->SetMin(lBiggestTreeMemory); @@ -1260,16 +1256,6 @@ void DTDecisionTreeCreationTask::BuildForestAttributeSelections(DTForestAttribut nMasterForestMaxAttributesSelectionNumber = forestattributeselection->GetMaxAttributesNumber(); } -int DTDecisionTreeCreationTask::ComputeMaxSlaveProcessNumber() const -{ - int result = RMResourceConstraints::GetMaxCoreNumberOnCluster(); - - if (result > nMaxCreatedAttributeNumber) - result = nMaxCreatedAttributeNumber; - - return result; -} - longint DTDecisionTreeCreationTask::ComputeMasterNecessaryMemory() { longint lResult; diff --git a/src/Learning/DTForest/DTDecisionTreeCreationTask.h b/src/Learning/DTForest/DTDecisionTreeCreationTask.h index a87f4e4e8..50fb57df7 100644 --- a/src/Learning/DTForest/DTDecisionTreeCreationTask.h +++ b/src/Learning/DTForest/DTDecisionTreeCreationTask.h @@ -162,9 +162,6 @@ class DTDecisionTreeCreationTask : public KDDataPreparationAttributeCreationTask //////////////////////////////////////////////////////////// // Implementation du ComputeResourceRequirements - // Estimation du nombre optimal de processeurs - int ComputeMaxSlaveProcessNumber() const; - /** Estimation de la memoire partagee */ longint ComputeSharedNecessaryMemory(); diff --git a/src/Learning/KWDataUtils/KWDatabaseIndexer.cpp b/src/Learning/KWDataUtils/KWDatabaseIndexer.cpp index 139c4816a..2da0c7552 100644 --- a/src/Learning/KWDataUtils/KWDatabaseIndexer.cpp +++ b/src/Learning/KWDataUtils/KWDatabaseIndexer.cpp @@ -23,6 +23,8 @@ KWDatabaseIndexer::~KWDatabaseIndexer() void KWDatabaseIndexer::InitializeFromDatabase(const KWDatabase* database) { KWClass* kwcMainClass; + RMTaskResourceGrant defaultGrantedResources; + RMTaskResourceRequirement defaultRequirements; require(database == NULL or database->Check()); @@ -70,7 +72,11 @@ void KWDatabaseIndexer::InitializeFromDatabase(const KWDatabase* database) } // Memorisation du nombre d'esclaves utilisables au moment de l'indexation - nResourceSlaveNumber = RMResourceConstraints::GetMaxCoreNumberOnCluster(); + // Pour cela, les ressources obtenues pour des exigences par defaut fournissent le nombre total d'esclaves disponibles + // (sur un cluster le nombre d'esclave est different du nombre de processus -1) + + RMParallelResourceManager::ComputeGrantedResources(&defaultRequirements, &defaultGrantedResources); + nResourceSlaveNumber = defaultGrantedResources.GetSlaveNumber(); } boolean KWDatabaseIndexer::IsInitialized() const diff --git a/src/Learning/KWLearningProblem/KWLearningProject.cpp b/src/Learning/KWLearningProblem/KWLearningProject.cpp index 552beeac7..691b9c17d 100644 --- a/src/Learning/KWLearningProblem/KWLearningProject.cpp +++ b/src/Learning/KWLearningProblem/KWLearningProject.cpp @@ -503,6 +503,7 @@ boolean KWLearningProject::ShowSystemInformation(const ALString& sValue) svEnvironmentVariables.Add("KHIOPS_HOME"); svEnvironmentVariables.Add("KHIOPS_API_MODE"); svEnvironmentVariables.Add("KHIOPS_MEMORY_LIMIT"); + svEnvironmentVariables.Add("KHIOPS_DRIVERS_PATH"); svEnvironmentVariables.Sort(); bEnvVarDefined = false; cout << "Environment variables:" << endl; diff --git a/src/Learning/KWLearningProblem/KWSystemParametersView.cpp b/src/Learning/KWLearningProblem/KWSystemParametersView.cpp index 2ee0eec82..dee975583 100644 --- a/src/Learning/KWLearningProblem/KWSystemParametersView.cpp +++ b/src/Learning/KWLearningProblem/KWSystemParametersView.cpp @@ -133,13 +133,12 @@ KWSystemParametersView::KWSystemParametersView() cast(UIIntElement*, GetFieldAt("MaxCoreNumber"))->SetMinValue(1); nMaxProcNumber = - min(RMResourceManager::GetPhysicalCoreNumber(), max(1, RMResourceManager::GetLogicalProcessNumber() - 1)); + min(RMResourceManager::GetPhysicalCoreNumber(), max(1, RMResourceManager::GetLogicalProcessNumber())); cast(UIIntElement*, GetFieldAt("MaxCoreNumber"))->SetMaxValue(nMaxProcNumber); cast(UIIntElement*, GetFieldAt("MaxCoreNumber"))->SetDefaultValue(nMaxProcNumber); - // Calcul du nombre effectif de processus qu'on utilise (en general DefaultValue +1) - RMResourceConstraints::SetMaxCoreNumberOnCluster( - ComputeCoreNumber(cast(UIIntElement*, GetFieldAt("MaxCoreNumber"))->GetDefaultValue())); + // Mise a jour de la contrainte du nombre de processus utilisable sur le systeme + RMResourceConstraints::SetMaxCoreNumberOnCluster(nMaxProcNumber); // On ne peut pas editer le nombre de process a utiliser le mode parallel n'est pas disponible if (not PLParallelTask::IsParallelModeAvailable()) @@ -200,9 +199,6 @@ KWSystemParametersView::~KWSystemParametersView() {} void KWSystemParametersView::EventUpdate(Object* object) { - int nRequestedCore; - ALString sTestFunctionality; - // On parametre directement les variables statiques correspondantes // en ignorant l'objet passe en parametres KWLearningSpec::SetMaxModalityNumber(GetIntValueAt("MaxItemNumberInReports")); @@ -212,8 +208,7 @@ void KWSystemParametersView::EventUpdate(Object* object) RMResourceConstraints::SetIgnoreMemoryLimit(GetBooleanValueAt("IgnoreMemoryLimit")); // Calcul du nombre de processus utilises - nRequestedCore = GetIntValueAt("MaxCoreNumber"); - RMResourceConstraints::SetMaxCoreNumberOnCluster(ComputeCoreNumber(nRequestedCore)); + RMResourceConstraints::SetMaxCoreNumberOnCluster(GetIntValueAt("MaxCoreNumber")); PLParallelTask::SetParallelSimulated(GetBooleanValueAt("ParallelSimulated")); PLParallelTask::SetParallelLogFileName(GetStringValueAt("ParallelLogFileName")); FileService::SetUserTmpDir(GetStringValueAt("TemporaryDirectoryName")); @@ -228,7 +223,7 @@ void KWSystemParametersView::EventRefresh(Object* object) SetIntValueAt("OptimizationTime", RMResourceConstraints::GetOptimizationTime()); SetIntValueAt("MemoryLimit", RMResourceConstraints::GetMemoryLimit()); SetBooleanValueAt("IgnoreMemoryLimit", RMResourceConstraints::GetIgnoreMemoryLimit()); - SetIntValueAt("MaxCoreNumber", ComputeRequestedCoreNumber(RMResourceConstraints::GetMaxCoreNumberOnCluster())); + SetIntValueAt("MaxCoreNumber", RMResourceConstraints::GetMaxCoreNumberOnCluster()); SetBooleanValueAt("ParallelSimulated", PLParallelTask::GetParallelSimulated()); SetStringValueAt("ParallelLogFileName", PLParallelTask::GetParallelLogFileName()); SetStringValueAt("TemporaryDirectoryName", FileService::GetUserTmpDir()); @@ -238,43 +233,3 @@ const ALString KWSystemParametersView::GetClassLabel() const { return "System parameters"; } - -int KWSystemParametersView::ComputeCoreNumber(int nRequestedCoreNumber) const -{ - int nMPIProcessNumber; - int nProcessNumber; - - require(nRequestedCoreNumber > 0); - - nMPIProcessNumber = RMResourceManager::GetLogicalProcessNumber(); - - // Evaluation du nombre de processus a lancer - if (nRequestedCoreNumber == 1) - { - // Lancement en sequentiel si l'utilisateur ne demande qu'un seul coeur - nProcessNumber = 1; - } - else - { - // En parallele, on utilise 1 processus de plus que ce que demande l'utilisateur. - // Et on n'utilise pas plus de processus qu'il n'y a de processus MPI (d'ou le min) - // Il est donc recommande de lancer Khiops avec 1 processus MPI de plus que le nombre - // de coeurs physiques - nProcessNumber = min(nRequestedCoreNumber + 1, nMPIProcessNumber); - } - ensure(nProcessNumber > 0); - return nProcessNumber; -} - -int KWSystemParametersView::ComputeRequestedCoreNumber(int nCoreNumber) const -{ - int nRequestedCore; - - require(nCoreNumber > 0); - - if (nCoreNumber == 1) - nRequestedCore = 1; - else - nRequestedCore = nCoreNumber - 1; - return nRequestedCore; -} diff --git a/src/Learning/KWLearningProblem/KWSystemParametersView.h b/src/Learning/KWLearningProblem/KWSystemParametersView.h index 8a60e3ef4..69659a411 100644 --- a/src/Learning/KWLearningProblem/KWSystemParametersView.h +++ b/src/Learning/KWLearningProblem/KWSystemParametersView.h @@ -39,17 +39,4 @@ class KWSystemParametersView : public UIObjectView // Libelles utilisateur const ALString GetClassLabel() const override; - - //////////////////////////////////////////////////////// - //// Implementation -protected: - // Calcul du nombre de processus utilise a partir de ce que l'utilisateur indique dans l'IHM - // Si nRequestedCoreNumber == 1 , c'est du sequentiel, on n'utilise qu'un seul processus - // Sinon on utilise un processus de plus que ce qui est demande sauf si il n'y a pas assez de - // processus MPI lances - int ComputeCoreNumber(int nRequestedCoreNumber) const; - - // Pendant de la methode ComputeCoreNumber, renvoie le nombre de coeurs affiche a l'IHM a partir - // du nombre de processus utilises - int ComputeRequestedCoreNumber(int nCoreNumber) const; }; diff --git a/src/Learning/KWUtils/KWKhiopsVersion.h b/src/Learning/KWUtils/KWKhiopsVersion.h index 477f7f47d..a3782c80e 100644 --- a/src/Learning/KWUtils/KWKhiopsVersion.h +++ b/src/Learning/KWUtils/KWKhiopsVersion.h @@ -10,7 +10,7 @@ // dans le TaskManager de Windows (par exemple) // Version de Khiops -#define KHIOPS_VERSION KHIOPS_STR(10.2.2) +#define KHIOPS_VERSION KHIOPS_STR(10.2.3) // Les versions release distribuees sont bases sur trois numeros, par exemple KHIOPS_STR(10.2.0) // Les versions alpha, beta ou release candidate ont un suffixe supplementaire, par exemple : // - KHIOPS_STR(10.5.0-a.1) diff --git a/src/Learning/KhiopsNativeInterface/CMakeLists.txt b/src/Learning/KhiopsNativeInterface/CMakeLists.txt index 7f9783e66..1d6abce7f 100644 --- a/src/Learning/KhiopsNativeInterface/CMakeLists.txt +++ b/src/Learning/KhiopsNativeInterface/CMakeLists.txt @@ -6,6 +6,7 @@ set_target_properties( KhiopsNativeInterface PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/KhiopsNativeInterface.h SOVERSION ${PROJECT_VERSION_MAJOR} - VERSION ${KHIOPS_VERSION}) + VERSION ${KHIOPS_VERSION} + WINDOWS_EXPORT_ALL_SYMBOLS true) set_khiops_options(KhiopsNativeInterface) diff --git a/src/Learning/SNBPredictor/SNBPredictorSelectiveNaiveBayesTrainingTask.cpp b/src/Learning/SNBPredictor/SNBPredictorSelectiveNaiveBayesTrainingTask.cpp index a5d141d5b..e34e6e42a 100644 --- a/src/Learning/SNBPredictor/SNBPredictorSelectiveNaiveBayesTrainingTask.cpp +++ b/src/Learning/SNBPredictor/SNBPredictorSelectiveNaiveBayesTrainingTask.cpp @@ -793,12 +793,18 @@ boolean SNBPredictorSNBTrainingTask::MasterInitializeDataTableBinarySliceSet() nDataTableSliceSetSliceNumber = 0; recoderClass = NULL; - // Recherche d'un nombre de slices qui permet d'executer la tache avec le minimum pour les buffer du - // KWDataTableSliceSet + // Recherche d'un nombre de slices qui permet d'executer la tache + // avec le minimum pour les buffers du KWDataTableSliceSet for (nSliceNumber = 1; nSliceNumber <= nMaxSliceNumber; nSliceNumber++) { + // Calcul de la memoire necessaire pour l'esclave pour ce nombre de slices + // NB: La memoire globale diminue avec la taille du buffer du slice set d'entree (2eme param). + // Donc on utilise BufferedFile::nDefaultBufferSize, qui est la plus grand taille utilisee + // lors de l'estimation de resources. Ceci assure que l'on atteint la borne-inf des + // ressources demandes. lSlaveNecessaryMemory = - ComputeGlobalSlaveNecessaryMemory(nSliceNumber, MemSegmentByteSize) / nSlaveProcessNumber + + ComputeGlobalSlaveNecessaryMemory(nSliceNumber, BufferedFile::nDefaultBufferSize) / + nSlaveProcessNumber + ComputeSlaveNecessaryMemory(nSlaveProcessNumber, nSliceNumber) + ComputeSharedNecessaryMemory(MemSegmentByteSize); if (lGrantedSlaveMemory >= lSlaveNecessaryMemory) @@ -1258,9 +1264,6 @@ boolean SNBPredictorSNBTrainingTask::CheckCurrentAttribute() const boolean SNBPredictorSNBTrainingTask::MasterFinalize(boolean bProcessEndedCorrectly) { - boolean bOk; - ALString sTmp; - require(masterSnbPredictor != NULL); require(masterInitialDatabase != NULL); require(masterInitialDatabase->Check()); @@ -1292,13 +1295,7 @@ boolean SNBPredictorSNBTrainingTask::MasterFinalize(boolean bProcessEndedCorrect // En parallele : Nettoyage du fichier dictionnaire auxilier if (IsParallel()) - { - bOk = FileService::RemoveFile( - FileService::GetURIFilePathName(shared_sRecoderClassDomainFileURI.GetValue())); - if (not bOk) - AddWarning(sTmp + "Failed to remove temporary dictionary " + - shared_sRecoderClassDomainFileURI.GetValue()); - } + FileService::RemoveFile(FileService::GetURIFilePathName(shared_sRecoderClassDomainFileURI.GetValue())); ensure(shared_learningSpec.GetLearningSpec()->Check()); ensure(masterSnbPredictor->GetClassStats() != NULL); diff --git a/src/Norm/CMakeLists.txt b/src/Norm/CMakeLists.txt index 580b6c667..d6f4a0b94 100644 --- a/src/Norm/CMakeLists.txt +++ b/src/Norm/CMakeLists.txt @@ -6,6 +6,7 @@ add_subdirectory(genere) add_subdirectory(basetest) add_subdirectory(generetest) +add_subdirectory(_khiopsgetprocnumber) # Add norm.jar (CMake names the target norm_jar) if(BUILD_JARS) diff --git a/src/Norm/_khiopsgetprocnumber/CMakeLists.txt b/src/Norm/_khiopsgetprocnumber/CMakeLists.txt new file mode 100644 index 000000000..7e09c3228 --- /dev/null +++ b/src/Norm/_khiopsgetprocnumber/CMakeLists.txt @@ -0,0 +1,5 @@ +file(GLOB cppfiles ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) + +add_executable(_khiopsgetprocnumber ${cppfiles} _khiopsgetprocnumber.rc) +set_khiops_options(_khiopsgetprocnumber) +target_link_libraries(_khiopsgetprocnumber PUBLIC base) diff --git a/src/Norm/_khiopsgetprocnumber/GetProcNumber.cpp b/src/Norm/_khiopsgetprocnumber/GetProcNumber.cpp new file mode 100644 index 000000000..f5c783735 --- /dev/null +++ b/src/Norm/_khiopsgetprocnumber/GetProcNumber.cpp @@ -0,0 +1,20 @@ +// Copyright (c) 2024 Orange. All rights reserved. +// This software is distributed under the BSD 3-Clause-clear License, the text of which is available +// at https://spdx.org/licenses/BSD-3-Clause-Clear.html or see the "LICENSE" file for more details. + +#include "SystemResource.h" +#include "Standard.h" + +int main(int argc, char** argv) +{ + if (argc != 1) + { + cout << "This program is used by Khiops. Displays the number of physical cores that are available." + << endl; + return EXIT_FAILURE; + } + + // Display proc number on stdout + cout << SystemGetProcessorNumber() << endl; + return EXIT_SUCCESS; +} diff --git a/src/Norm/_khiopsgetprocnumber/Version.h b/src/Norm/_khiopsgetprocnumber/Version.h new file mode 100644 index 000000000..ab74d154a --- /dev/null +++ b/src/Norm/_khiopsgetprocnumber/Version.h @@ -0,0 +1,21 @@ +// Copyright (c) 2024 Orange. All rights reserved. +// This software is distributed under the BSD 3-Clause-clear License, the text of which is available +// at https://spdx.org/licenses/BSD-3-Clause-Clear.html or see the "LICENSE" file for more details. + +#pragma once +#define GPN_STR(s) #s + +// Constantes definies a la fois dans les sources et dans les ressources Visual Studio. +// Les ressources permettent d'afficher des informations sur les executables +// dans le TaskManager de Windows (par exemple) + +// Version de Khiops +#define GPN_VERSION GPN_STR(1.0.0) +// Les versions release distribuees sont bases sur trois numeros, par exemple GPN_STR(10.2.0) +// Les versions alpha, beta ou release candidate ont un suffixe supplementaire, par exemple : +// - GPN_STR(10.5.0-a.1) +// - GPN_STR(10.5.0-b.3) +// - GPN_STR(10.5.0-rc.2) + +// Copyright +#define GPN_COPYRIGHT_LABEL GPN_STR((c)2024 Orange - All rights reserved.) diff --git a/src/Norm/_khiopsgetprocnumber/_khiopsgetprocnumber.rc b/src/Norm/_khiopsgetprocnumber/_khiopsgetprocnumber.rc new file mode 100644 index 000000000..dc3f2b7b6 --- /dev/null +++ b/src/Norm/_khiopsgetprocnumber/_khiopsgetprocnumber.rc @@ -0,0 +1,114 @@ +// Microsoft Visual C++ generated resource script. +// +#include "resource.h" + +#define APSTUDIO_READONLY_SYMBOLS +///////////////////////////////////////////////////////////////////////////// +// +// Generated from the TEXTINCLUDE 2 resource. +// +#include "winres.h" + +///////////////////////////////////////////////////////////////////////////// +#undef APSTUDIO_READONLY_SYMBOLS + +///////////////////////////////////////////////////////////////////////////// +// Anglais (États-Unis) resources + +#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU) +LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US + +#ifdef APSTUDIO_INVOKED +///////////////////////////////////////////////////////////////////////////// +// +// TEXTINCLUDE +// + +1 TEXTINCLUDE +BEGIN + "resource.h\0" +END + +2 TEXTINCLUDE +BEGIN + "#include ""winres.h""\r\n" + "\0" +END + +3 TEXTINCLUDE +BEGIN + "\r\n" + "\0" +END + +#endif // APSTUDIO_INVOKED + +#endif // Anglais (États-Unis) resources +///////////////////////////////////////////////////////////////////////////// + + +///////////////////////////////////////////////////////////////////////////// +// Français (France) resources + +#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_FRA) +LANGUAGE LANG_FRENCH, SUBLANG_FRENCH + +///////////////////////////////////////////////////////////////////////////// +// +// Version +// + +VS_VERSION_INFO VERSIONINFO + FILEVERSION 0,0,0,0 + PRODUCTVERSION 0,0,0,0 + FILEFLAGSMASK 0x3fL +#ifdef _DEBUG + FILEFLAGS 0x1L +#else + FILEFLAGS 0x0L +#endif + FILEOS 0x40004L + FILETYPE 0x1L + FILESUBTYPE 0x0L +BEGIN + BLOCK "StringFileInfo" + BEGIN + BLOCK "040c04b0" + BEGIN + VALUE "CompanyName", "Orange" + VALUE "FileDescription","Khiops Get Proc Number Utility" + VALUE "FileVersion", GPN_VERSION + VALUE "InternalName", "_khiopsgetprocnumber.exe" + VALUE "LegalCopyright", GPN_COPYRIGHT_LABEL + VALUE "OriginalFilename", "_khiopsgetprocnumber.exe" + VALUE "ProductName", "Khiops Get Proc Number" + VALUE "ProductVersion", GPN_VERSION + END + END + BLOCK "VarFileInfo" + BEGIN + VALUE "Translation", 0x40c, 1200 + END +END + + +///////////////////////////////////////////////////////////////////////////// +// +// String Table +// + + +#endif // Français (France) resources +///////////////////////////////////////////////////////////////////////////// + + + +#ifndef APSTUDIO_INVOKED +///////////////////////////////////////////////////////////////////////////// +// +// Generated from the TEXTINCLUDE 3 resource. +// + + +///////////////////////////////////////////////////////////////////////////// +#endif // not APSTUDIO_INVOKED \ No newline at end of file diff --git a/src/Norm/_khiopsgetprocnumber/resource.h b/src/Norm/_khiopsgetprocnumber/resource.h new file mode 100644 index 000000000..dec4c59f6 --- /dev/null +++ b/src/Norm/_khiopsgetprocnumber/resource.h @@ -0,0 +1,22 @@ +// Copyright (c) 2024 Orange. All rights reserved. +// This software is distributed under the BSD 3-Clause-clear License, the text of which is available +// at https://spdx.org/licenses/BSD-3-Clause-Clear.html or see the "LICENSE" file for more details. + +// Fichier genere automatiquement pour l'affichage des ressources +// Ne pas modifier +#ifdef _WIN32 +//{{NO_DEPENDENCIES}} +// Microsoft Visual C++ generated include file. +// Used by khisto.rc +#include "Version.h" +// Valeurs par defaut suivantes des nouveaux objets +// +#ifdef APSTUDIO_INVOKED +#ifndef APSTUDIO_READONLY_SYMBOLS +#define _APS_NEXT_RESOURCE_VALUE 101 +#define _APS_NEXT_COMMAND_VALUE 40001 +#define _APS_NEXT_CONTROL_VALUE 1001 +#define _APS_NEXT_SYMED_VALUE 101 +#endif +#endif +#endif diff --git a/src/Norm/base/SystemFile.cpp b/src/Norm/base/SystemFile.cpp index 78ff5b364..412368017 100644 --- a/src/Norm/base/SystemFile.cpp +++ b/src/Norm/base/SystemFile.cpp @@ -196,6 +196,116 @@ boolean SystemFile::CloseOutputFile(const ALString& sFilePathName) return bOk; } +longint SystemFile::Read(void* pBuffer, size_t size, size_t count) +{ + longint lRes; + ALString sTmp; + require(fileDriver != NULL); + require(fileHandle != NULL); + require(bIsOpenForRead); + + // Mode de test : toujours en echec + if (bAlwaysErrorOnRead) + { + errno = ECANCELED; + return 0; + } + + if (FileService::LogIOStats()) + MemoryStatsManager::AddLog(sTmp + "driver [" + fileDriver->GetDriverName() + "] fread Begin"); + lRes = fileDriver->Fread(pBuffer, size, count, fileHandle); + if (FileService::LogIOStats()) + MemoryStatsManager::AddLog(sTmp + "driver [" + fileDriver->GetDriverName() + "] fread End"); + + // Renvoie 0 en cas d'erreur + if (lRes == -1) + lRes = 0; + return lRes; +} + +boolean SystemFile::SeekPositionInFile(longint lPosition) +{ + boolean bRes; + ALString sTmp; + + require(fileDriver != NULL); + require(fileHandle != NULL); + require(bIsOpenForRead); + if (FileService::LogIOStats()) + MemoryStatsManager::AddLog(sTmp + "driver [" + fileDriver->GetDriverName() + "] fseek Begin"); + + bRes = fileDriver->SeekPositionInFile(lPosition, fileHandle); + if (FileService::LogIOStats()) + MemoryStatsManager::AddLog(sTmp + "driver [" + fileDriver->GetDriverName() + "] fseek End"); + + return bRes; +} + +longint SystemFile::Write(const void* pBuffer, size_t size, size_t count) +{ + longint lRes; + ALString sTmp; + + require(fileDriver != NULL); + require(fileHandle != NULL); + require(bIsOpenForWrite); + + // Mode de test : toujours en echec + if (bAlwaysErrorOnFlush) + { + errno = ECANCELED; + return 0; + } + + // Mise a jour des informations sur la reserve + // Ce n'est pas la peine de remettre la reserve a zero si elle est negative, + // car toute nouvelle reserve ecrasera necessairement l'etat actuel + lReservedExtraSize -= count * size; + lRequestedExtraSize -= count * size; + if (FileService::LogIOStats()) + MemoryStatsManager::AddLog(sTmp + "driver [" + fileDriver->GetDriverName() + "] fwrite Begin"); + + lRes = fileDriver->Fwrite(pBuffer, size, count, fileHandle); + if (FileService::LogIOStats()) + MemoryStatsManager::AddLog(sTmp + "driver [" + fileDriver->GetDriverName() + "] fwrite End"); + + return lRes; +} + +boolean SystemFile::Flush() +{ + boolean bRes; + ALString sTmp; + + require(fileDriver != NULL); + require(fileHandle != NULL); + require(bIsOpenForWrite); + + // Mode de test : toujours en echec + if (bAlwaysErrorOnFlush) + { + errno = ECANCELED; + return false; + } + + if (FileService::LogIOStats()) + MemoryStatsManager::AddLog(sTmp + "driver [" + fileDriver->GetDriverName() + "] flush Begin"); + bRes = fileDriver->Flush(fileHandle); + if (FileService::LogIOStats()) + MemoryStatsManager::AddLog(sTmp + "driver [" + fileDriver->GetDriverName() + "] flush End"); + return bRes; +} + +ALString SystemFile::GetLastErrorMessage() +{ + // Le driver peut etre null dans le cas ou on ne peut pas ouvrir le fichier + if (fileDriver == NULL) + { + return "file driver is missing"; + } + return fileDriver->GetLastErrorMessage(); +} + longint SystemFile::GetFileSize(const ALString& sFilePathName) { longint lFileSize = 0; diff --git a/src/Norm/base/SystemFile.h b/src/Norm/base/SystemFile.h index 0d7b391e0..adb661c7f 100644 --- a/src/Norm/base/SystemFile.h +++ b/src/Norm/base/SystemFile.h @@ -115,109 +115,3 @@ class SystemFile : public Object static boolean bAlwaysErrorOnRead; static boolean bAlwaysErrorOnFlush; }; - -inline longint SystemFile::Read(void* pBuffer, size_t size, size_t count) -{ - longint lRes; - ALString sTmp; - require(fileDriver != NULL); - require(fileHandle != NULL); - require(bIsOpenForRead); - - // Mode de test : toujours en echec - if (bAlwaysErrorOnRead) - { - errno = ECANCELED; - return 0; - } - - if (FileService::LogIOStats()) - MemoryStatsManager::AddLog(sTmp + "driver [" + fileDriver->GetDriverName() + "] fread Begin"); - lRes = fileDriver->Fread(pBuffer, size, count, fileHandle); - if (FileService::LogIOStats()) - MemoryStatsManager::AddLog(sTmp + "driver [" + fileDriver->GetDriverName() + "] fread End"); - return lRes; -} - -inline boolean SystemFile::SeekPositionInFile(longint lPosition) -{ - boolean bRes; - ALString sTmp; - - require(fileDriver != NULL); - require(fileHandle != NULL); - require(bIsOpenForRead); - if (FileService::LogIOStats()) - MemoryStatsManager::AddLog(sTmp + "driver [" + fileDriver->GetDriverName() + "] fseek Begin"); - - bRes = fileDriver->SeekPositionInFile(lPosition, fileHandle); - if (FileService::LogIOStats()) - MemoryStatsManager::AddLog(sTmp + "driver [" + fileDriver->GetDriverName() + "] fseek End"); - - return bRes; -} - -inline longint SystemFile::Write(const void* pBuffer, size_t size, size_t count) -{ - longint lRes; - ALString sTmp; - - require(fileDriver != NULL); - require(fileHandle != NULL); - require(bIsOpenForWrite); - - // Mode de test : toujours en echec - if (bAlwaysErrorOnFlush) - { - errno = ECANCELED; - return 0; - } - - // Mise a jour des informations sur la reserve - // Ce n'est pas la peine de remettre la reserve a zero si elle est negative, - // car toute nouvelle reserve ecrasera necessairement l'etat actuel - lReservedExtraSize -= count * size; - lRequestedExtraSize -= count * size; - if (FileService::LogIOStats()) - MemoryStatsManager::AddLog(sTmp + "driver [" + fileDriver->GetDriverName() + "] fwrite Begin"); - - lRes = fileDriver->Fwrite(pBuffer, size, count, fileHandle); - if (FileService::LogIOStats()) - MemoryStatsManager::AddLog(sTmp + "driver [" + fileDriver->GetDriverName() + "] fwrite End"); - - return lRes; -} - -inline boolean SystemFile::Flush() -{ - boolean bRes; - ALString sTmp; - - require(fileDriver != NULL); - require(fileHandle != NULL); - require(bIsOpenForWrite); - - // Mode de test : toujours en echec - if (bAlwaysErrorOnFlush) - { - errno = ECANCELED; - return false; - } - - if (FileService::LogIOStats()) - MemoryStatsManager::AddLog(sTmp + "driver [" + fileDriver->GetDriverName() + "] flush Begin"); - bRes = fileDriver->Flush(fileHandle); - if (FileService::LogIOStats()) - MemoryStatsManager::AddLog(sTmp + "driver [" + fileDriver->GetDriverName() + "] flush End"); - return bRes; -} - -inline ALString SystemFile::GetLastErrorMessage() -{ - // Le driver peut etre null dans le cas ou on ne peut pas ouvrir le fichier - if (fileDriver == NULL) - { - return "file driver is missing"; - } - return fileDriver->GetLastErrorMessage(); -} diff --git a/src/Norm/base/SystemFileDriverANSI.cpp b/src/Norm/base/SystemFileDriverANSI.cpp index 4b1f589da..620d43c56 100644 --- a/src/Norm/base/SystemFileDriverANSI.cpp +++ b/src/Norm/base/SystemFileDriverANSI.cpp @@ -121,7 +121,7 @@ longint SystemFileDriverANSI::Fread(void* ptr, size_t size, size_t count, void* // Lecture dans le fichier lRes = std::fread(ptr, size, count, (FILE*)stream); - if (lRes != longint(count) and ferror((FILE*)stream)) + if (ferror((FILE*)stream)) lRes = -1; return lRes; } @@ -142,7 +142,7 @@ longint SystemFileDriverANSI::Fwrite(const void* ptr, size_t size, size_t count, // Ecriture dans le fichier lWrite = std::fwrite(ptr, size, count, (FILE*)stream); - if (lWrite != longint(count) and ferror((FILE*)stream)) + if (ferror((FILE*)stream)) lWrite = 0; return lWrite; } diff --git a/src/Norm/base/SystemFileDriverCreator.cpp b/src/Norm/base/SystemFileDriverCreator.cpp index dc5ef3282..72cf283b0 100644 --- a/src/Norm/base/SystemFileDriverCreator.cpp +++ b/src/Norm/base/SystemFileDriverCreator.cpp @@ -21,7 +21,7 @@ int SystemFileDriverCreator::RegisterExternalDrivers() SystemFileDriverLibrary* driverLibrary; ALString sLibraryScheme; SystemFileDriver* registeredDriver; - boolean bOk; + boolean bOk = true; int i; int nDriver; ALString sTmp; @@ -30,17 +30,46 @@ int SystemFileDriverCreator::RegisterExternalDrivers() if (oaSystemFileDriver == NULL) oaSystemFileDriver = new ObjectArray; + nExternalDriverNumber = 0; + + // On cherche les drivers dans le chemin renseigne par la variable d'environement KHIOPS_DRIVERS_PATH + // Si elle n'est pas renseigne, on cherche dans les chemins par defaut. + + // Teste si KHIOPS_DRIVERS_PATH est renseignee avec une valeur correcte + sLibraryPath = p_getenv("KHIOPS_DRIVERS_PATH"); + if (sLibraryPath != "") + { + if (!FileService::DirExists(sLibraryPath)) + { + Global::AddError( + "", "", "Drivers location directory missing (KHIOPS_DRIVERS_PATH=" + sLibraryPath + ")"); + bOk = false; + } + } + // Si KHIOPS_DRIVERS_PATH n'est pas renseignee, on cherche dans les valeurs par defaut + if (bOk and sLibraryPath == "") + { #ifdef _WIN32 - sLibraryPath = p_getenv("KHIOPS_HOME"); - sLibraryPath += "\\bin"; + sLibraryPath = p_getenv("KHIOPS_HOME"); + sLibraryPath += "\\bin"; #elif defined __linux__ - sLibraryPath = "/usr/lib/"; + sLibraryPath = "/usr/lib/"; #elif defined __APPLE__ // TODO: a verifier - sLibraryPath = "/usr/lib/"; + sLibraryPath = "/usr/lib/"; #endif - bOk = FileService::GetDirectoryContentExtended(sLibraryPath, &svDirectoryNames, &svFileNames); - nExternalDriverNumber = 0; + } + + if (bOk) + { + bOk = FileService::GetDirectoryContentExtended(sLibraryPath, &svDirectoryNames, &svFileNames); + // On ne teste pas le retour de la methode car ca revient a tester si KHIOPS_HOME\bin existe et si Khiops est installe, il existe. + // En revanche ca pose probleme si on lance MODL en standalone (avec khy_test ou directement en ligne de commande, sans passer par khiops_env) + // car KHIOPS_HOME n'est pas defini et on va chercher dans le repertoire \lib qui n'existe pas. Mais c'est un cas particulier + // dans lequel on ne veut pas charger les drivers (et donc on ne veut pas d'erreur) + // Sur Linux il n'y a pas de probleme potentiel : /usr/lib existe toujours + } + if (bOk) { // Parcours du repertoire et chargement de tous les fichiers qui ont un nom de la forme diff --git a/src/Norm/base/SystemResource.cpp b/src/Norm/base/SystemResource.cpp index 27834d78c..d2f542607 100644 --- a/src/Norm/base/SystemResource.cpp +++ b/src/Norm/base/SystemResource.cpp @@ -3,7 +3,6 @@ // at https://spdx.org/licenses/BSD-3-Clause-Clear.html or see the "LICENSE" file for more details. #include "SystemResource.h" -#include "Portability.h" #include "MemoryManager.h" // Pour eviter les warning sur strcpy et sprintf diff --git a/test/LearningTest/MTdatasets/SpliceJunction/transcodeSpliceJunction.py b/test/LearningTest/MTdatasets/SpliceJunction/transcodeSpliceJunction.py index 6255ff48d..9ef4ac00a 100644 --- a/test/LearningTest/MTdatasets/SpliceJunction/transcodeSpliceJunction.py +++ b/test/LearningTest/MTdatasets/SpliceJunction/transcodeSpliceJunction.py @@ -1,37 +1,41 @@ -import os - - -def Transcode(outputFileName, outputDataFileName): - # ouverture des fichiers - # fInput = open("splice.data", 'r') - # avec les donnees sans doublons - fInput = open("splice.data.cleaned", "r") - fOutput = open(outputFileName, "w") - fOutputData = open(outputDataFileName, "w") - - # ligne d'entete - fOutput.write("SampleId\tClass\n") - fOutputData.write("SampleId\tPos\tChar\n") - - # parcours de repertoires - lines = fInput.readlines() - for line in lines: - line = line.replace("\n", "") - line = line.replace(" ", "") - fields = line.split(",") - Class = fields[0] - sampleId = fields[1] - DNA = fields[2] - fOutput.write(sampleId + "\t" + Class + "\n") - index = 1 - for char in DNA: - fOutputData.write(sampleId + "\t" + str(index) + "\t" + char + "\n") - index = index + 1 - - # fermeture des fichiers - fInput.close() - fOutput.close() - fOutputData.close() - - -Transcode("SpliceJunction.txt", "SpliceJunctionDNA.txt") +# Copyright (c) 2024 Orange. All rights reserved. +# This software is distributed under the BSD 3-Clause-clear License, the text of which is available +# at https://spdx.org/licenses/BSD-3-Clause-Clear.html or see the "LICENSE" file for more details. + +import os + + +def Transcode(outputFileName, outputDataFileName): + # ouverture des fichiers + # fInput = open("splice.data", 'r') + # avec les donnees sans doublons + fInput = open("splice.data.cleaned", "r") + fOutput = open(outputFileName, "w") + fOutputData = open(outputDataFileName, "w") + + # ligne d'entete + fOutput.write("SampleId\tClass\n") + fOutputData.write("SampleId\tPos\tChar\n") + + # parcours de repertoires + lines = fInput.readlines() + for line in lines: + line = line.replace("\n", "") + line = line.replace(" ", "") + fields = line.split(",") + Class = fields[0] + sampleId = fields[1] + DNA = fields[2] + fOutput.write(sampleId + "\t" + Class + "\n") + index = 1 + for char in DNA: + fOutputData.write(sampleId + "\t" + str(index) + "\t" + char + "\n") + index = index + 1 + + # fermeture des fichiers + fInput.close() + fOutput.close() + fOutputData.close() + + +Transcode("SpliceJunction.txt", "SpliceJunctionDNA.txt") diff --git a/test/LearningTestTool/py/_kht_check_results.py b/test/LearningTestTool/py/_kht_check_results.py index 746b6a2c4..a27c394f6 100644 --- a/test/LearningTestTool/py/_kht_check_results.py +++ b/test/LearningTestTool/py/_kht_check_results.py @@ -1,1847 +1,1851 @@ -import os.path -import re - -import _kht_constants as kht -import _kht_utils as utils -import _kht_results_management as results - -""" -Verification des resultats d'un repertoire de test terminal - -La comparaison est effectue entre les resultats de test, et les resultats de reference -correspondant au contexte en cours (plateforme, parallel ou sequuentiel...). -Elle se fait sur tous les fichiers du repertoire de facon hierarchique -- nombre de fichiers de chaque repertoire -- noms des fichiers -- pour chaque fichier - - nombre de lignes - - contenu - - comparaison des lignes - - si necessaire, comparaison des champs des lignes, pour un separateur tabulation - - si necessaire, comparaison des tokens du champ, - dans le cas de la tokenisation d'un fichier json ou kdic - -La comparaison se fait en etant tolerant aux variations 'normales' selon le contexte d'execution -- il peut y avoir des resultats de reference different selon le contexte -- on filtre prealablement certaines informations non presentes systematiquement - - copyright - - prefix de type '[0] ' lie au process, genere par mpiexec en parallele - - statistique sur la memoire ne mode debug - ... -- il y a une tolerance sur les valeur numeriques, ce qui entraine alors des warning et non des erreurs -- ... - -En cas d'erreurs residuelles, plusieurs strategies de recouvrement des erreurs sont utilises, -a differents moments du processus de comparaison -- tolerance sur echec de scenario, si cela correspond au resultats de reference -- tolerance aux noms de fichier utilisant des caracteres accentues systeme dependant -- tolerance sur les messages d'erreurs differents en parallele et en sequentiel -- tolerance sur les message d'erreur lies au manque de ressource -... -""" - -# Nom du fichier de comparaison -COMPARISON_LOG_FILE_NAME = kht.COMPARISON_RESULTS_LOG - -# Constantes de la section SUMMARY des fichiers de log des resultats de comparaison -SUMMARY_TITLE = "SUMMARY" -SUMMARY_WARNING_KEY = "warning(s)" -SUMMARY_ERROR_KEY = "error(s)" -SUMMARY_FILE_TYPES_KEY = "Problem file types: " -SUMMARY_NOTE_KEY = "Note: " -SUMMARY_PORTABILITY_KEY = "Portability: " - -# Constantes pour la gestion des fichiers speciaux, par priorite decroissante -SUMMARY_TIMEOUT_ERROR_KEY = "TIMEOUT ERROR" -SUMMARY_FATAL_ERROR_KEY = "FATAL ERROR" -SUMMARY_UNEXPECTED_OUTPUT_KEY = "UNEXPECTED OUTPUT" -SUMMARY_SPECIAL_FILE_KEYS = [ - SUMMARY_TIMEOUT_ERROR_KEY, - SUMMARY_FATAL_ERROR_KEY, - SUMMARY_UNEXPECTED_OUTPUT_KEY, -] - -# Association entre type de fichier special et cle de gestion dans le resume -SUMMARY_SPECIAL_FILE_KEYS_PER_FILE = { - kht.STDOUT_ERROR_LOG: SUMMARY_UNEXPECTED_OUTPUT_KEY, - kht.STDERR_ERROR_LOG: SUMMARY_UNEXPECTED_OUTPUT_KEY, - kht.PROCESS_TIMEOUT_ERROR_LOG: SUMMARY_TIMEOUT_ERROR_KEY, - kht.RETURN_CODE_ERROR_LOG: SUMMARY_FATAL_ERROR_KEY, -} -assert len(SUMMARY_SPECIAL_FILE_KEYS_PER_FILE) == len(kht.SPECIAL_ERROR_FILES) - -# Ensemble des cle pouvant se trouver dans le resume -ALL_SUMMARY_KEYS = [ - SUMMARY_WARNING_KEY, - SUMMARY_ERROR_KEY, - SUMMARY_FILE_TYPES_KEY, - SUMMARY_PORTABILITY_KEY, -] + SUMMARY_SPECIAL_FILE_KEYS -assert len(set(ALL_SUMMARY_KEYS)) == len(ALL_SUMMARY_KEYS), ( - "Summary keys " + str(ALL_SUMMARY_KEYS) + " must not contain duplicates" -) - - -def analyse_comparison_log(test_dir): - """ - Analyse du log de comparaison des resultats de test et de reference - present dans un repertoire de test - Renvoie: - - error_number - Le nombre d'erreurs deduit du resume - - warning_number - Le nombre de warnings deduit du resume - - summary_infos: - Un dictionnaire par avec une ligne de texte par cle de resume (ALL_SUMMARY_KEYS) - - files_infos: - Un dictionaire par nom de fichier contenant le resultat de la comparaison - pour ce fichier, sous la forme d'un texte potentiellement multi-lignes - Ce texte contient 'OK' uniquement si aucun problme n'est detecte - Il contient des lignes de texte, dont certain sont potentiellement prefixes par 'warning: ' - ou 'error : ' sinon - Si le log de comparaison n'est pas disponible ou exploitable, on retourne une erreur - """ - - def extract_number(message): - assert message != "" - fields = message.split() - assert fields[0].isdigit() - number = int(fields[0]) - return number - - utils.check_test_dir(test_dir) - - # Initialisation des resultats - error_number = 0 - warning_number = 0 - summary_infos = {} - files_infos = {} - - # Traitement des erreurs memorisee dans le log - log_file_path = os.path.join(test_dir, kht.COMPARISON_RESULTS_LOG) - if not os.path.isfile(log_file_path): - # Erreur speciale si pas de fichier de comparaison - error_number = 1 - summary_infos[SUMMARY_NOTE_KEY] = "The test has not been launched" - else: - try: - with open(log_file_path, "r", errors="ignore") as log_file: - lines = log_file.readlines() - except Exception as exception: - # Erreur speciale si probleme de lecture du fichier de comparaison - lines = None - error_number = 1 - summary_infos[SUMMARY_NOTE_KEY] = ( - "Unable to read file " + kht.COMPARISON_RESULTS_LOG + str(exception) - ) - # Analyse du contenu du fichier - file_pattern = "file " - if lines is not None: - index = 0 - while index < len(lines): - line = lines[index] - index += 1 - line = line.strip() - - # Analyse des lignes concernant chaque fichier avant le resume - if line.find(file_pattern) == 0: - file_path = line[len(file_pattern) :] - file_name = os.path.basename(file_path) - file_info = "" - while index < len(lines): - line = lines[index] - index += 1 - line = line.strip() - if line == "": - break - else: - if file_info != "": - file_info += "\n" - file_info += line - files_infos[file_name] = file_info - continue - - # Analyse du resume jsuq'u la fin du fichier si debut de resume trouve - if line == SUMMARY_TITLE: - while index < len(lines): - line = lines[index] - index += 1 - line = line.strip() - for key in ALL_SUMMARY_KEYS: - if line.find(key) >= 0: - summary_infos[key] = line - if key == SUMMARY_WARNING_KEY: - warning_number = extract_number(line) - elif key == SUMMARY_ERROR_KEY: - error_number = extract_number(line) - - # Erreur speciale si le resume n'est pas trouve - if len(summary_infos) == 0: - assert error_number == 0 - error_number = 1 - specific_message = ( - "Section '" - + SUMMARY_TITLE - + "' not found in " - + kht.COMPARISON_RESULTS_LOG - ) - summary_infos[SUMMARY_NOTE_KEY] = specific_message - # Retour des resultats - return error_number, warning_number, summary_infos, files_infos - - -def check_results(test_dir, forced_context=None): - """ - Fonction principale de comparaison des resultats de test et de reference - Les fichiers sont compares 2 a 2 et la synthese de la comparaison est ecrite - dans un fichier de log, avec un resume en fin de fichier, facile a parser - On retourne True s'il n'y a aucune erreur - - Le parametrage d'un contexte force en entree permete d'effectuer la comparaison avec - un contexte (parallel|sequential, platform) alternatif. Dans ce cas: - - l'objectif est essentiellement de renvoyer un indicateur global de succes de la comparaison - - on n'ecrit pas de fichier de comparaison - """ - utils.check_test_dir(test_dir) - - # Initialisation des stats de comparaison - special_error_file_error_numbers = {} - for file_name in kht.SPECIAL_ERROR_FILES: - special_error_file_error_numbers[file_name] = 0 - error_number = 0 - warning_number = 0 - user_message_warning_number = 0 - compared_files_number = 0 - error_number_in_err_txt = 0 - error_number_per_extension = {} - error_number_per_file = {} - erroneous_ref_file_lines = {} - erroneous_test_file_lines = {} - erroneous_file_names = [] - extension_message = "" - specific_message = "" - portability_message = "" - recovery_message = "" - - # Ouverture du fichier de log de comparaison, sauf si lle contexte est force - log_file = None - if forced_context is None: - log_file_path = os.path.join(test_dir, COMPARISON_LOG_FILE_NAME) - try: - log_file = open(log_file_path, "w", errors="ignore") - except Exception as exception: - print("error : unable to create log file " + log_file_path, exception) - return - assert log_file is not None - utils.write_message( - utils.test_dir_name(test_dir) + " comparison", log_file=log_file - ) - - # Information sur le contexte courant de comparaison des resultats - if forced_context is None: - current_context = results.get_current_results_ref_context() - utils.write_message( - "current comparison context : " + str(current_context), - log_file=log_file, - ) - else: - current_context = forced_context - - # Test de presence du repertoire de test a comparer - results_dir = os.path.join(test_dir, kht.RESULTS) - if not os.path.isdir(results_dir): - utils.write_message( - "error : no comparison, test directory not available (" + results_dir + ")", - log_file=log_file, - show=True, - ) - error_number = error_number + 1 - - # Recherche du repertoire courant des resultats de reference - results_ref, candidate_dirs = results.get_results_ref_dir( - test_dir, forced_context=forced_context, log_file=log_file, show=True - ) - if results_ref is None: - utils.write_message( - "error : invalid " - + kht.RESULTS_REF - + " dirs " - + utils.list_to_label(candidate_dirs), - log_file=log_file, - show=True, - ) - error_number = error_number + 1 - elif len(candidate_dirs) >= 2: - portability_message = ( - "used " + results_ref + " dir among " + utils.list_to_label(candidate_dirs) - ) - utils.write_message( - portability_message, - log_file=log_file, - show=True, - ) - - # Test de presence du repertoire de reference a comparer - results_ref_dir = "" - if error_number == 0: - results_ref_dir = os.path.join(test_dir, results_ref) - if not os.path.isdir(results_ref_dir): - utils.write_message( - "error : no comparison, reference directory not available (" - + results_ref_dir - + ")", - log_file=log_file, - show=True, - ) - error_number = error_number + 1 - - # Comparaison effective si possible - if error_number == 0: - # Acces aux fichiers des repertoires de reference et de test - # On passe par le format bytes des noms de fichier pour avoir acces - # aux fichier quelque soit la plateforme - # - Windows ne supporte que l'utf8 - # - Linux stocke les noms directement sous la forme de bytes - ref_byte_file_names = os.listdir(os.fsencode(results_ref_dir)) - test_byte_file_names = os.listdir(os.fsencode(results_dir)) - - # On memorise les noms de fichiers sous forme de string pour faciliter le reporting - # Tout en gardant l'association entre le nom python (utf8) et les noms en bytes - # - # Attention, la methode fsdecode utilise des 'surrogate characters' invisible - # permettant de garder trace des bytes non utf8 pour le re-encodage par fsencode si necessaire - # On passe par une version 'nettoyee' purement ascii de ces caracteres speciaux pour memoriser - # l'association entre un nom de fichier de type string et un nom de type bytes - # Dans ce cas, il suffit de memoriser dans les resultats de reference la - # version du nom de fichier sans bytes (valide quelque soit la plateforme) - # Pour les resultats de test, le nom peut comporter des bytes, mais on tolere - # la comparaison si sa version nettoyee est la meme que pour le fichier de reference - ref_file_names = [] - dic_ref_byte_file_names = {} - recovery = False - for byte_file_name in ref_byte_file_names: - file_name = os.fsdecode(byte_file_name) - cleaned_file_name = file_name.encode("ascii", "ignore").decode("ascii") - if cleaned_file_name != file_name: - utils.write_message( - "warning : reference file name with a byte encoding (" - + str(byte_file_name) - + ") used under ascii name (" - + cleaned_file_name - + ")", - log_file=log_file, - ) - warning_number += 1 - recovery = True - ref_file_names.append(cleaned_file_name) - dic_ref_byte_file_names[cleaned_file_name] = byte_file_name - # Idem pour les resultat de test - test_file_names = [] - dic_test_byte_file_names = {} - for byte_file_name in test_byte_file_names: - file_name = os.fsdecode(byte_file_name) - cleaned_file_name = file_name.encode("ascii", "ignore").decode("ascii") - if cleaned_file_name != file_name: - utils.write_message( - "warning : test file name with a byte encoding (" - + str(byte_file_name) - + ") used under ascii name (" - + cleaned_file_name - + ")", - log_file=log_file, - ) - warning_number += 1 - recovery = True - test_file_names.append(cleaned_file_name) - dic_test_byte_file_names[cleaned_file_name] = byte_file_name - - # Message de recuperation d'erreur si necessaire - if recovery: - utils.write_message( - "\nRecovery from errors caused by byte encoding of file names in another platform", - log_file=log_file, - ) - recovery_message = utils.append_message( - recovery_message, "Recovery of type byte encoding of file names" - ) - - # On tri par nom de fichier pour ameliorer la stabilite du reporting inter plateformes - ref_file_names.sort() - test_file_names.sort() - - # Comparaison des nombres de fichiers - ref_result_file_number = len(ref_file_names) - test_result_file_number = len(test_file_names) - if ref_result_file_number == 0: - utils.write_message( - "error : no comparison, missing reference result files", - log_file=log_file, - show=True, - ) - error_number = error_number + 1 - elif ref_result_file_number != test_result_file_number: - utils.write_message( - "\nerror : number of results files (" - + str(test_result_file_number) - + ") should be " - + str(ref_result_file_number), - log_file=log_file, - show=True, - ) - error_number = error_number + 1 - # Affichage des noms des fichiers supplementaires - max_file_reported = 20 - if test_result_file_number > ref_result_file_number: - # Message specifique en cas de fichiers en trop - specific_message = utils.append_message( - specific_message, "additional result files" - ) - utils.write_message( - "Additional files in " + kht.RESULTS + " dir:", log_file=log_file - ) - file_reported = 0 - for file_name in test_file_names: - if file_name not in ref_file_names: - if file_reported < max_file_reported: - utils.write_message("\t" + file_name, log_file=log_file) - else: - utils.write_message("\t...", log_file=log_file) - break - file_reported += 1 - elif test_result_file_number < ref_result_file_number: - # Message specifique en cas de fichiers manquants - specific_message = utils.append_message( - specific_message, "missing result files" - ) - utils.write_message( - "Missing files in " + kht.RESULTS + " dir:", log_file=log_file - ) - file_reported = 0 - for file_name in ref_file_names: - if file_name not in test_file_names: - if file_reported < max_file_reported: - utils.write_message("\t" + file_name, log_file=log_file) - else: - utils.write_message("\t...", log_file=log_file) - break - file_reported += 1 - - # Comparaison des fichiers 2 a 2 en memorisant les erreurs par extension - for file_name in ref_file_names: - compared_files_number = compared_files_number + 1 - - # Path des fichiers utilises pour le reporting - ref_file_path = os.path.join(results_ref_dir, file_name) - test_file_path = os.path.join(results_dir, file_name) - - # En-tete de comparaison des fichiers - utils.write_message("\nfile " + test_file_path, log_file=log_file) - - # On utilise si possible le path des fichiers en bytes pour s'adapter aux contraintes de la plateforme - # Les erreurs seront diagnostiquees si necessaire lors de la lecture des fichiers - used_ref_file_path = ref_file_path - if dic_ref_byte_file_names.get(file_name) is not None: - used_ref_file_path = os.path.join( - os.fsencode(results_ref_dir), dic_ref_byte_file_names.get(file_name) - ) - used_test_file_path = test_file_path - if dic_test_byte_file_names.get(file_name) is not None: - used_test_file_path = os.path.join( - os.fsencode(results_dir), dic_test_byte_file_names.get(file_name) - ) - - # Lecture des fichiers - ref_file_lines = utils.read_file_lines( - used_ref_file_path, log_file=log_file - ) - test_file_lines = utils.read_file_lines( - used_test_file_path, log_file=log_file - ) - if ref_file_lines is None: - error_number = error_number + 1 - if test_file_lines is None: - error_number = error_number + 1 - - # Comparaison si ok - if ref_file_lines is not None and test_file_lines is not None: - # Cas des fichiers stdout et stderr, que l'on filtre du prefix de process id presnet en parallele - if file_name in [kht.STDOUT_ERROR_LOG, kht.STDERR_ERROR_LOG]: - ref_file_lines = utils.filter_process_id_prefix_from_lines( - ref_file_lines - ) - test_file_lines = utils.filter_process_id_prefix_from_lines( - test_file_lines - ) - - # Mise en forme specifique des messages utilisateurs (error, warning) pour les traiter - # de facon identique dans les cas des fichiers de log utilisateur et json - contains_user_messages = False - # Cas du fichier de log utilisateur - if file_name == kht.ERR_TXT: - contains_user_messages = True - # Identification des lignes de message - ref_file_lines = strip_user_message_lines(ref_file_lines) - test_file_lines = strip_user_message_lines(test_file_lines) - # Cas des fichiers json - elif is_file_with_json_extension(file_name): - contains_user_messages = True - # Pretraitement des lignes de message pour les mettre dans le meme format - # que pour les fichier d'erreur - ref_file_lines = strip_user_message_lines_in_json_file( - ref_file_lines - ) - test_file_lines = strip_user_message_lines_in_json_file( - test_file_lines - ) - - # Filtrage des messages specifiques au sequentiel (100th...) - if contains_user_messages: - ref_file_lines = filter_sequential_messages_lines( - ref_file_lines, log_file=log_file - ) - test_file_lines = filter_sequential_messages_lines( - test_file_lines, log_file=log_file - ) - - # Comparaison des fichiers pre-traites - errors, warnings, user_message_warnings = check_file_lines( - ref_file_path, - test_file_path, - ref_file_lines, - test_file_lines, - log_file=log_file, - ) - error_number += errors - warning_number += warnings - user_message_warning_number += user_message_warnings - - # Memorisation des statistiques par extension - if errors > 0: - erroneous_file_names.append(file_name) - error_number_per_file[file_name] = errors - erroneous_ref_file_lines[file_name] = ref_file_lines - erroneous_test_file_lines[file_name] = test_file_lines - if file_name == kht.ERR_TXT: - error_number_in_err_txt += errors - else: - _, file_extension = os.path.splitext(file_name) - error_number_per_extension[file_extension] = ( - error_number_per_extension.get(file_extension, 0) + errors - ) - - # Message synthetique de recuperation des warnng sur les message utilisateur si necessaire - if user_message_warning_number > 0: - recovery_message = utils.append_message( - recovery_message, "Recovery from varying patterns in user messages" - ) - - # Recherche des erreurs fatales, avec tentative de recuperation - # On accepte les erreurs fatales que si on ales meme en test et reference, - # et uniquement dans le cas du pattern particulier du "Batch mode failure" qui est du - # a des scenario n'ayant pas pu s'excuter entierement pour des raison de portabilite - fatal_error_recovery = True - for file_name in test_file_names: - # Cas d'une erreur fatale - if file_name in kht.SPECIAL_ERROR_FILES: - special_error_file_error_numbers[file_name] = ( - special_error_file_error_numbers[file_name] + 1 - ) - error_number += 1 - special_error = SUMMARY_SPECIAL_FILE_KEYS_PER_FILE[file_name].lower() - utils.write_message( - "\n" + special_error + " : found file " + file_name, - log_file=log_file, - ) - - # La tentative de recuperation des erreurs fatales echoue si on ne respecte - # pas toutes les conditions necessaires - if file_name not in [kht.STDERR_ERROR_LOG, kht.RETURN_CODE_ERROR_LOG]: - fatal_error_recovery = False - else: - # Les fichiers doivent etre les memes - if ( - file_name in erroneous_file_names - or file_name not in ref_file_names - ): - fatal_error_recovery = False - # Test que le fichier est reduit au pattern accepte - if not fatal_error_recovery: - # Lecture des lignes du fichier - test_file_path = os.path.join(results_dir, file_name) - test_file_lines = utils.read_file_lines( - test_file_path, log_file=log_file - ) - # Pattern dans le cas de sdterr - fatal_error_pattern = ( - "fatal error : Command file : Batch mode failure" - ) - if file_name == kht.STDERR_ERROR_LOG: - if ( - len(test_file_lines) == 0 - or test_file_lines[0].strip() != fatal_error_pattern - ): - fatal_error_recovery = False - # Pattern dans le cas du code retour - return_code_error_pattern = "Wrong return code: 1 (should be 0)" - if file_name == kht.RETURN_CODE_ERROR_LOG: - if ( - len(test_file_lines) == 0 - or test_file_lines[0].strip() - != return_code_error_pattern - ): - fatal_error_recovery = False - # Message de recuperation si necessaire - if special_error_file_error_numbers[kht.RETURN_CODE_ERROR_LOG] > 0: - # Cas de la recuperation - if fatal_error_recovery: - error_number -= special_error_file_error_numbers[ - kht.RETURN_CODE_ERROR_LOG - ] - error_number -= special_error_file_error_numbers[kht.STDERR_ERROR_LOG] - special_error_file_error_numbers[kht.RETURN_CODE_ERROR_LOG] = 0 - special_error_file_error_numbers[kht.STDERR_ERROR_LOG] = 0 - utils.write_message( - "\nRecovery from fatal errors caused solely by a 'Batch mode failure' in another platform", - log_file=log_file, - ) - recovery_message = utils.append_message( - recovery_message, "Recovery of type 'Batch mode failure'" - ) - - # Ecriture des premieres lignes des fichiers d'erreur fatales ou de timeout si necessaire - for file_name in test_file_names: - if ( - file_name in kht.SPECIAL_ERROR_FILES - and special_error_file_error_numbers[file_name] > 0 - ): - # Lecture des lignes du fichier - test_file_path = os.path.join(results_dir, file_name) - test_file_lines = utils.read_file_lines( - test_file_path, log_file=log_file - ) - utils.write_message( - "\nspecial error file " + test_file_path, log_file=log_file - ) - max_print_lines = 10 - for i, line in enumerate(test_file_lines): - if i < max_print_lines: - utils.write_message("\t" + line.rstrip(), log_file=log_file) - else: - utils.write_message("\t...", log_file=log_file) - break - - # Il y a plusieurs tentatives de recuperation des erreurs pour des jeux de test ou des variation normales - # sont possibles, comme par exemple des difference sur la caucl de l'auc en cas de manque de ressource - # Ces tentatives sont implementees de facon pragmatique (code minimaliste, facile a developper et faire evoluer) - # pour automatiser l'analyse manuelle des resultats qui ete effectuee auparavant - # On ne cherche pas a ere resilient a tous les cas possibles, ni a gerer la complexite des types de recuperation - # pouvant se combiner. Ces methodes de recuperation ne servent parfois que pour un seul jeu de donnees, - # et il ne faut pas hesiter si besoin a simplifier certains jeux de test pour eviter qu'ils combinent - # plusieurs problemes de recuperation - - # Tentative de recuperation des erreurs si la seule difference provient du fichier de log de Khiops - # et est du a des warning en nombre variable en mode parallele, sans ecriture de rapport - if error_number > 0: - varying_warning_messages_in_err_txt_recovery = True - - # Les messages doivent n'apparaitre que dans le fichier de log - if varying_warning_messages_in_err_txt_recovery: - varying_warning_messages_in_err_txt_recovery = ( - error_number == error_number_in_err_txt - ) - - # Filtrage d'un certain type de warning pour recommencer la comaraison - if varying_warning_messages_in_err_txt_recovery: - # Acces aux lignes des fichier - ref_file_lines = erroneous_ref_file_lines.get(kht.ERR_TXT) - test_file_lines = erroneous_test_file_lines.get(kht.ERR_TXT) - - # Filtrage des lignes selon le motif en nombre variable - warning_pattern1 = "warning : Data table slice " - warning_pattern2 = " : Read data table slice interrupted by user" - filtered_ref_file_lines = [] - filtered_test_file_lines = [] - for line in ref_file_lines: - if line.find(warning_pattern1) != 0 or line.find(warning_pattern2) < 0: - filtered_ref_file_lines.append(line) - for line in test_file_lines: - if line.find(warning_pattern1) != 0 or line.find(warning_pattern2) < 0: - filtered_test_file_lines.append(line) - - # Comparaison a nouveau des fichiers, en mode non verbeux - errors, warnings, user_message_warnings = check_file_lines( - kht.ERR_TXT, - kht.ERR_TXT, - filtered_ref_file_lines, - filtered_test_file_lines, - ) - - # Recuperation possible si plus d'erreur apres filtrage - varying_warning_messages_in_err_txt_recovery = errors == 0 - - # Recuperation effective des erreurs si possible - if varying_warning_messages_in_err_txt_recovery: - # Messages sur la recuperation - recovery_summary = ( - "Recovery from varying warning number in " + kht.ERR_TXT + " file only" - ) - recovery_message = utils.append_message(recovery_message, recovery_summary) - utils.write_message("\n" + recovery_summary + ":", log_file=log_file) - utils.write_message( - "\tall errors come from the warning in " - + kht.ERR_TXT - + " file only, du to varying number of active process number", - log_file=log_file, - ) - utils.write_message( - "\t" + str(error_number) + " errors converted to warnings", - log_file=log_file, - ) - # On transforme les erreur en warning - warning_number += error_number - error_number = 0 - # On reinitialise egalement les stats d'erreur pour les extensions concernees - error_number_in_err_txt = 0 - - # Tentative de recuperation des erreurs si la seule difference est une difference d'ordre - # des messages utilisateur (error ou warning) - if error_number > 0: - unsorted_user_messages_recovery = True - - # Verification de la repartition des nombres d'erreur - if unsorted_user_messages_recovery: - # Recherche du nombre d'erreur dans les rapport json - error_number_in_json_report_files = error_number_per_extension.get( - ".khj", 0 - ) + error_number_per_extension.get(".khcj", 0) - - # On test si le nombre total d'erreur se rapartit entre le fichier de log utilisateur - # et les rapports json - unsorted_user_messages_recovery = ( - error_number_in_err_txt == error_number_in_json_report_files - and error_number_in_err_txt + error_number_in_json_report_files - == error_number - ) - - # Analyse specifique de la sous partie des fichiers correspondant aux messages utilisateur, - # qui ont ete marque en stripant les lignes correspondantes - if unsorted_user_messages_recovery: - - def filter_record_index_from_lines(lines): - """Filtrage avance des lignes en supprimant le debut de ligne jusqu'a l'index de record""" - filtered_lines = [] - record_index_pattern = [ - "warning : Data table ", - " : Record ", - " : Field ", - ] - for input_line in lines: - pos1 = utils.find_pattern_in_line(input_line, record_index_pattern) - if pos1 >= 0: - input_line = input_line[ - input_line.find(record_index_pattern[-1]) : - ] - filtered_lines.append(input_line) - return filtered_lines - - # Parcours des fichiers concerne pour reanalyser leur lignes specifiques aux erreurs - user_message_error_number = 0 - recovered_error_number = 0 - recovered_warning_number = 0 - for file_name in erroneous_file_names: - # Recherche des lignes des fichiers erronnes - test_file_lines = erroneous_test_file_lines.get(file_name) - if test_file_lines is not None: - ref_file_lines = erroneous_ref_file_lines.get(file_name) - assert ref_file_lines is not None - # Extraction des lignes stripees, qui correspond aux messages utilisateurs - test_file_lines = extract_striped_lines(test_file_lines) - ref_file_lines = extract_striped_lines(ref_file_lines) - # Comparaison de la partie des fichiers pre-traites relative aux messages utilisateur - # La comparaison se fait de facon muette, sans passer par le fichier de log - errors, warnings, user_message_warnings = check_file_lines( - file_name, - file_name, - ref_file_lines, - test_file_lines, - ) - user_message_error_number += errors - # Comparaison filtree les messages utilisateurs jusqu'aux index des records, - # qui peuvent varier d'une execution a l'autre, puis les avoir trier - test_file_lines = filter_record_index_from_lines(test_file_lines) - ref_file_lines = filter_record_index_from_lines(ref_file_lines) - test_file_lines.sort() - ref_file_lines.sort() - errors, warnings, user_message_warnings = check_file_lines( - file_name, - file_name, - ref_file_lines, - test_file_lines, - ) - recovered_error_number += errors - recovered_warning_number += warnings - - # Il faut que les erreurs ne proviennent que des messages utilisateurs - if unsorted_user_messages_recovery: - unsorted_user_messages_recovery = ( - user_message_error_number == error_number - ) - # Il faut qu'il n'y ai plus d'erreur apres tri des message utilisateurs - if unsorted_user_messages_recovery: - unsorted_user_messages_recovery = recovered_error_number == 0 - - # Recuperation effective des erreurs si possible - if unsorted_user_messages_recovery: - # Messages sur la recuperation - recovery_summary = "Recovery from unsorted user messages" - recovery_message = utils.append_message(recovery_message, recovery_summary) - utils.write_message("\n" + recovery_summary + ":", log_file=log_file) - utils.write_message( - "\tall errors come from the users messages in " - + kht.ERR_TXT - + " and in json reports, with a different order and possibly different record indexes", - log_file=log_file, - ) - utils.write_message( - "\t" + str(error_number) + " errors converted to warnings", - log_file=log_file, - ) - # On transforme les erreur en warning - warning_number += error_number - error_number = 0 - # On reinitialise egalement les stats d'erreur pour les extensions concernees - error_number_per_extension[".khj"] = 0 - error_number_per_extension[".khcj"] = 0 - error_number_in_err_txt = 0 - - # Tentative de recuperation des erreurs si la seule difference provient de la limite des ressources - # qui ne permet pas de calcul la courbe de ROC de facon exacte - if error_number > 0: - roc_curve_recovery = True - - # On verifie d'abord qu'il y a un warning correspondant dans le log utilisateur - if roc_curve_recovery: - # On doit potentiellement relire ce fichier, car ce type de message correspond - # a un motif USER qui ne genere pas d'erreur - err_file_lines = erroneous_test_file_lines.get(kht.ERR_TXT) - if err_file_lines is None: - err_file_path = os.path.join(results_dir, kht.ERR_TXT) - err_file_lines = utils.read_file_lines(err_file_path) - if err_file_lines is None: - roc_curve_recovery = False - else: - searched_warning = ( - "warning : Evaluation Selective Naive Bayes : " - + "Not enough memory to compute the exact AUC:" - + " estimation made on a sub-sample of size" - ) - roc_curve_recovery = ( - utils.find_pattern_in_lines(err_file_lines, [searched_warning]) >= 0 - ) - - # Comptage des erreurs pour les fichier d'evaluation au format xls - error_number_in_json_report_files = 0 - if roc_curve_recovery: - error_number_in_evaluation_xls = 0 - for file_name in erroneous_file_names: - _, file_extension = os.path.splitext(file_name) - if file_extension == ".xls" and "EvaluationReport" in file_name: - error_number_in_evaluation_xls += error_number_per_file.get( - file_name - ) - # On teste si les nombre d'erreurs se rappartis dans le fichier de log utilisateur, - # les rapports json et les fichiers d'evalauation au format xls - error_number_in_json_report_files = error_number_per_extension.get( - ".khj", 0 - ) - roc_curve_recovery = ( - error_number_in_err_txt - + error_number_in_json_report_files - + error_number_in_evaluation_xls - == error_number - ) - - # Analyse specifique des rapports json en excluant la partie lie a la courbe de ROC - roc_curve_error_number = 0 - roc_curve_warning_number = 0 - if roc_curve_recovery: - for file_name in erroneous_file_names: - _, file_extension = os.path.splitext(file_name) - if file_extension == ".khj": - # Parcours des fichiers concerne pour reanalyser leur lignes specifiques aux erreurs - test_file_lines = erroneous_test_file_lines.get(file_name) - ref_file_lines = erroneous_ref_file_lines.get(file_name) - assert test_file_lines is not None - assert ref_file_lines is not None - # Extraction des champs qui correspondent au calcul de l'AUC et des courbes de ROC - for key in ["auc", "values"]: - # Selection d'un champ selon sa valeur - selected_test_file_lines = ( - extract_key_matching_lines_in_json_file( - test_file_lines, key - ) - ) - selected_ref_file_lines = ( - extract_key_matching_lines_in_json_file(ref_file_lines, key) - ) - # Comparaison de la partie des fichiers pre-traites relative aux messages utilisateur - # La comparaison se fait de facon muette, sans passer par le ficheir de log - errors, warnings, user_message_warnings = check_file_lines( - file_name, - file_name, - selected_test_file_lines, - selected_ref_file_lines, - ) - roc_curve_error_number += errors - roc_curve_warning_number += warnings - - # Le recouvrement est possible si le nombre d'erreurs trouves specifiquement pour le calcul - # de l'AUC et des courbes de ROC correspond au nombre d'eerur total - assert roc_curve_error_number <= error_number_in_json_report_files - roc_curve_recovery = ( - roc_curve_error_number == error_number_in_json_report_files - ) - - # Recuperation effective des erreurs si possible - if roc_curve_recovery: - # Messages sur la recuperation - recovery_summary = "Recovery from AUC rough estimate" - recovery_message = utils.append_message(recovery_message, recovery_summary) - utils.write_message("\n" + recovery_summary + ":", log_file=log_file) - utils.write_message( - "\tall errors in json report file come from AUC rough estimate", - log_file=log_file, - ) - utils.write_message( - "\t" - + str(roc_curve_error_number) - + " errors in json report files converted to warnings", - log_file=log_file, - ) - utils.write_message( - "\t" - + str(error_number - roc_curve_error_number) - + " errors in evaluation xls files ignored and converted to warnings", - log_file=log_file, - ) - # On transforme les erreur en warning - warning_number += error_number - error_number = 0 - # On reinitialise egalement les stats d'erreur pour les extensions concernees - error_number_per_extension[".khj"] = 0 - error_number_per_extension[".xls"] = 0 - - # Tentative de recuperation des erreurs dans le cas tres particulier des caracteres accentues sous Windows, - # ou on observe un comportement local a la machine de developement sous Windows different de celui - # observe sur la machine Windows cloud, pourl aquelle certains fichiers sources avec caracteres - # accentues n'ont pas pu etre dezippes correctement et conduisent a des erreurs de lecture - # Dans ce cas uniquement, on tente de se comparer a une version linux de reference, pour laquelle - # on a le meme probleme et on observe le meme comportement - # Pas de recuperation d'erreur avancee si un contexte est force - if error_number > 0 and forced_context is None: - zip_encoding_recovery = True - - # On verifie d'abord que les conditions sont reunies - linux_context = None - if zip_encoding_recovery: - # On doit etre sous Windows - zip_encoding_recovery = results.get_context_platform_type() == "Windows" - - # Le fichier err.txt doit comporter une erreur de lecture - if zip_encoding_recovery: - read_error_pattern = ["error : File ./", " : Unable to open file ("] - err_file_path = os.path.join(results_dir, kht.ERR_TXT) - err_file_lines = utils.read_file_lines(err_file_path) - zip_encoding_recovery = err_file_lines is not None - # On doit trouver le pattern d'erreur - if zip_encoding_recovery: - line_index = utils.find_pattern_in_lines( - err_file_lines, read_error_pattern - ) - zip_encoding_recovery = line_index >= 0 - # La ligne concernee doit avoir un probleme de caracrete accentue - if zip_encoding_recovery: - erronneous_line = err_file_lines[line_index] - ascii_erronneous_line = erronneous_line.encode( - "ascii", "ignore" - ).decode("ascii") - zip_encoding_recovery = ascii_erronneous_line != erronneous_line - - # Il doit y avoir un des resultats de references specifiques pour Linux - if zip_encoding_recovery: - assert forced_context is None - windows_results_ref_dir, _ = results.get_results_ref_dir(test_dir) - linux_context = [results.get_context_computing_type(), "Linux"] - linux_results_ref_dir, _ = results.get_results_ref_dir( - test_dir, forced_context=linux_context - ) - zip_encoding_recovery = windows_results_ref_dir != linux_results_ref_dir - - # Comparaison des resultats de test avec ceux de reference sous linux - if zip_encoding_recovery: - results_ref_dir = os.path.join(test_dir, linux_results_ref_dir) - assert linux_context is not None - # Comparaison "pragmatique" entre les fichiers des repertoires de test et de reference - # en forcant le contexte, sans tentative de recuperation d'erreur avancee - zip_encoding_recovery = check_results( - test_dir, forced_context=linux_context - ) - - # Recuperation effective des erreurs si possible - if zip_encoding_recovery: - # Messages sur la recuperation - recovery_summary = ( - "Recovery from poor handling of accented file names by zip" - ) - recovery_message = utils.append_message(recovery_message, recovery_summary) - utils.write_message("\n" + recovery_summary + ":", log_file=log_file) - utils.write_message( - "\tcomparison for Windows test results is performed using Linux reference results", - log_file=log_file, - ) - utils.write_message( - "\t" + str(error_number) + " errors converted to warnings", - log_file=log_file, - ) - # On transforme les erreur en warning - warning_number += error_number - error_number = 0 - # On reinitialise egalement les stats d'erreur - for extension in error_number_per_extension: - error_number_per_extension[extension] = 0 - for file_name in kht.SPECIAL_ERROR_FILES: - special_error_file_error_numbers[file_name] = 0 - - # Message dedies aux fichiers speciaux - special_error_file_message = "" - for file_name in kht.SPECIAL_ERROR_FILES: - if special_error_file_error_numbers[file_name] > 0: - special_error_file_message = SUMMARY_SPECIAL_FILE_KEYS_PER_FILE[file_name] - break - - # Ecriture d'un resume synthetique - utils.write_message("\n" + SUMMARY_TITLE, log_file=log_file) - utils.write_message( - str(warning_number) + " " + SUMMARY_WARNING_KEY, log_file=log_file - ) - utils.write_message(str(error_number) + " " + SUMMARY_ERROR_KEY, log_file=log_file) - if special_error_file_message != "": - utils.write_message(special_error_file_message, log_file=log_file) - if error_number > 0: - # Tri des extensions - file_extensions = [] - for file_extension in error_number_per_extension: - file_extensions.append(file_extension) - file_extensions.sort() - # Message specifique si erreurs dans un seul type de fichier - if error_number_in_err_txt > 0: - extension_message = utils.append_message(extension_message, kht.ERR_TXT) - if error_number_in_err_txt == error_number: - specific_message = utils.append_message( - specific_message, "errors only in " + kht.ERR_TXT - ) - if len(file_extensions) > 0: - for file_extension in file_extensions: - extension_message = utils.append_message( - extension_message, file_extension - ) - if error_number_per_extension[file_extension] == error_number: - specific_message = utils.append_message( - specific_message, "errors only in " + file_extension + " files" - ) - # Ecriture des messages additionnels - if extension_message != "": - utils.write_message( - SUMMARY_FILE_TYPES_KEY + extension_message, log_file=log_file - ) - if specific_message != "": - utils.write_message(SUMMARY_NOTE_KEY + specific_message, log_file=log_file) - - # Ecriture d'un message additionnel lie a la portabilite - portability_message = utils.append_message(portability_message, recovery_message) - if portability_message != "": - utils.write_message( - SUMMARY_PORTABILITY_KEY + portability_message, log_file=log_file - ) - - # Affichage d'un message de fin sur la console si le contexte n'est pas force - if forced_context is None: - final_message = "--Comparison done : " - final_message += str(compared_files_number) + " files(s) compared, " - final_message += str(error_number) + " error(s), " - final_message += str(warning_number) + " warning(s)" - if special_error_file_message != "": - final_message += ", " + special_error_file_message - if recovery_message != "": - final_message += ", Recovery from errors" - print(final_message) - print(" log file: " + log_file_path + "\n") - return error_number == 0 - - -def is_file_with_json_extension(file_path): - """Test si le path d'un fichier correspond a un fichier json""" - # Recherche du fichier compare et de son extension - file_name = os.path.basename(file_path) - _, file_extension = os.path.splitext(file_name) - - # Extension json de base - json_file_extensions = [".json", ".khj", ".khvj", ".khcj", ".kdicj"] - # On rajoute les extension en les suffisant par "bad" pour permettre - # de gerer des tests de fichier corrects avec une extension erronnee - for extension in json_file_extensions.copy(): - json_file_extensions.append(extension + "bad") - is_json_file = file_extension in json_file_extensions - return is_json_file - - -def is_line_striped(line): - """Test si une ligne est stripee, sans caractere fin de ligne a la fin""" - return len(line) == 0 or line[-1] != "\n" - - -def strip_user_message_lines(lines): - """Renvoie la liste des lignes en ayant stripe toutes les lignes correspondant a - message utilisateur ('error' ou 'warning') - Permet ensuite de reperer simplement ces lignes dans une liste - """ - result_lines = [] - for line in lines: - if line.find("warning : ") == 0 or line.find("error : ") == 0: - line = line.strip() - result_lines.append(line) - return result_lines - - -def strip_user_message_lines_in_json_file(lines): - """Analyse d'un fichier json pour identifiant les sections 'messages' - contenant les messages utilisateur ('error' ou 'warning') - Les ligne correspondantes sont mise sous le meme format que dans le fichier - de log d'erreur en supprimant les caracteres '"' de debut et de fin - Ces lignes sont egalement stripees pour pouvoir les reperer simplement dans la - la liste de ligne retournee en sortie - - Remarque: on se base sur le formatge json en sortie des outils Khiops, - qui permet de faire une analyse simple ligne a ligne et de garder les numeros - de lignes corrects dans les message d'erreur, meme en cas de format json erronne - Une alternative par chargement direct d'un fichier json ne permettrait pas - ce type d'analyse et de diagnostic - """ - - def clean_message(message): - """Nettoyage d'une ligne de message, entre '"' et potentiellement suivi d'une ',' - Cela ne gere pas tous les cas d'encodage json, mais cela est suffisant la plupart du temps - """ - cleaned_message = message.strip() - # Cas d'un milieur de section, avec ',' en fin de ligne - if cleaned_message[-1] == ",": - cleaned_message = cleaned_message[1:-2] - # Cas d'une fin de section - else: - cleaned_message = cleaned_message[1:-1] - return cleaned_message - - # Recherche des lignes du fichier dans les sections "messages" - in_message_section = False - result_lines = [] - # Pretraitement des lignes - for line in lines: - # Cas ou est dans la section des message - if in_message_section: - # Detection de la fin de section - in_message_section = line.strip() != "]" - # Nettoyage des lignes dans la section message - if in_message_section: - line = clean_message(line) - # Cas hors de la section des message - else: - # Detection du debut de section - in_message_section = line.strip() == '"messages": [' - result_lines.append(line) - return result_lines - - -def discard_key_matching_lines_in_json_file(lines, pattern): - """Renvoie la sous-liste des lignes ne correspondant pas a la cle en parametre""" - result_lines = [] - searched_full_pattern = '"' + pattern + '": ' - for line in lines: - if line.strip().find(searched_full_pattern) != 0: - result_lines.append(line) - return result_lines - - -def extract_key_matching_lines_in_json_file(lines, pattern): - """Renvoie la sous-liste des lignes correspondant a la cle en parametre""" - result_lines = [] - searched_full_pattern = '"' + pattern + '": ' - for line in lines: - if line.strip().find(searched_full_pattern) == 0: - result_lines.append(line) - return result_lines - - -def extract_striped_lines(lines): - """Retourne la sous_liste des lignes stripees de la liste en entree""" - striped_lines = [] - for line in lines: - if is_line_striped(line): - striped_lines.append(line) - return striped_lines - - -def filter_sequential_messages_lines(lines, log_file=None): - """Filtrage des errors et warning sequentiel d'un ensemble de lignes - - En sequentiel, de nouveaux messages de type 100th ou ... - sont emis, alors qu'il sont absents en parallele - En les filtrant, on rend les versions sequentielle et parallele comparables - Retourne les ligne filtrees, avec un message dans le log sur le nombre de lignes filtrees - """ - - def is_specific_line_pair_sequential(line1, line2): - """Test si une paire de lignes correspond a un pattern de message sequentiel - Premiere ligne avec 100th, 1000th error ou warning - Seconde ligne avec '...' - """ - message_type = "" - if line1.find("warning : ") == 0: - message_type = "warning" - elif line1.find("error : ") == 0: - message_type = "error" - is_specific = message_type != "" - # La premiere ligne doit se terminer par un pattern de type '(100th warning)' - if is_specific: - line1 = line1.strip() - expected_end_line1 = "00th " + message_type + ")" - is_specific = ( - line1[len(line1) - len(expected_end_line1) :] == expected_end_line1 - ) - # La seconde ligne doit se terminer par ' : ...' - if is_specific: - is_specific = line2.find(message_type) == 0 - if is_specific: - line2 = line2.strip() - expected_end_line2 = " : ..." - is_specific = ( - line2[len(line2) - len(expected_end_line2) :] == expected_end_line2 - ) - return is_specific - - result_lines = [] - filtered_line_number = 0 - # Filtrage des lignes - i = 0 - line_number = len(lines) - while i < line_number: - line = lines[i] - # On ne traite pas la derniere ligne, qui n'a pas de ligne suivante - if i == line_number - 1: - result_lines.append(line) - else: - next_line = lines[i + 1] - # On saute deux lignes si elles sont specifique a des message en sequentiel - if is_specific_line_pair_sequential(line, next_line): - i += 1 - filtered_line_number += 2 - else: - result_lines.append(line) - i += 1 - # Message si lignes filtrees - if filtered_line_number > 0: - utils.write_message( - "Specific sequential messages (100th...): " - + str(filtered_line_number) - + " lines filtered", - log_file=log_file, - ) - return result_lines - - -""" Liste de motifs pour lesquels ont admet une variation normale s'il font parti de la comparaison - dans une paire de lignes. Dans ce cas, on ignore la comparaison -""" -RESILIENCE_USER_MESSAGE_PATTERNS = [ - [ - "system resources are not sufficient to run the task (need ", - " of additional memory)", - ], - [ - "error : ", - "Database basic stats ", - "Too much memory necessary to store the values of the target variable ", - " (more than ", - ], - [ - "warning : Evaluation Selective Naive Bayes : Not enough memory to compute the exact AUC: " - + "estimation made on a sub-sample of size " - ], - [ - "warning : Database ", - ": Record ", - " : Single instance ", - "uses too much memory (more than ", - " after reading ", - " secondary records ", - ], - ["error : ", " : Not enough memory "], -] - - -def check_file_lines( - ref_file_path: str, - test_file_path: str, - ref_file_lines, - test_file_lines, - log_file=None, -): - """ - Comparaison d'un fichier de test et d'un fichier de reference - Parametres: - - ref_file_path: chemin du fichier de reference - - test_file_path: chemin du fichier de test - - ref_file_lines: liste des lignes du fichier de reference - - test_file_lines: liste des lignes du fichier de test - - log file: fichier de log ouvert dans le quel des messages sont ecrits (seulement si log_file est specifie) - - Retourne - - errors: nombre d'erreurs - - warnings: nombre de warnings - - user_message_warnings: nombre de warnings lie a une tolerance sur la variation des messages utilisateurs - (ex: "too much memory") - - Les noms des fichiers en parametre permettent de specialiser les comparaisons selon le type de fichier - Les listes de lignes en entree permettent d'eviter de relire un fichier dont on connait le nom - et dont on a deja lu les lignes. - Cela permet par exemple de reutiliser les methodes de comparaison apres avoir filtre le fichier - de sous-parties que l'on ne souhaite pas comparer. - - Compare les fichiers ligne par ligne, champ par champ (separateur '\t'), et token par token - dans le cas des fichiers json ou dictionnaire - On a avec des tolerances selon le type de fichier. - Pour les valeurs numeriques, une difference relative de 0.00001 est toleree - - ecrit les difference dans le fichier log_file et affiche le nb d'erreur dans le terminal - - warning : 2 champs contiennent des valeurs numeriques avec une difference relative toleree - - error : les champs sont differents - """ - - def filter_time(value): - # Suppression d'un pattern de time d'une valeur - pos_start_time = value.find(" time:") - if pos_start_time >= 0: - begin_value = value[:pos_start_time] - end_value = value[pos_start_time + len(" time:") :] - end_value = end_value.strip() - pos_end_time = end_value.find(" ") - if pos_end_time >= 0: - end_value = end_value[pos_end_time:] - else: - end_value = "" - filtered_value = begin_value + " time: ..." + filter_time(end_value) - else: - filtered_value = value - return filtered_value - - def filter_khiops_temp_dir(value): - # Nettoyage de la partie temp directory d'une valeur - pos_khiops_temp_dir = value.find("~Khiops") - if pos_khiops_temp_dir >= 0: - # Recherche du debut du path du fichier - begin_pos = pos_khiops_temp_dir - while begin_pos > 0 and value[begin_pos] != " ": - begin_pos -= 1 - # Recherche de la fin du repertoire temporaire - end_pos = pos_khiops_temp_dir - while ( - end_pos < len(value) - and value[end_pos] != "/" - and value[end_pos] != "\\" - ): - end_pos += 1 - while end_pos < len(value) and ( - value[end_pos] == "/" or value[end_pos] == "\\" - ): - end_pos += 1 - # Remplacement du nom du repertoire par un nom "logique" - begin_value = value[0:begin_pos] - end_value = value[end_pos : len(value)] - # Recherche du nom de fichier en debut de la end_value qui suit le nom du repertoire temporaire - filtered_filename = "" - end_filename_pos = end_value.find(" ") - if end_filename_pos != -1: - filename = end_value[0:end_filename_pos] - end_value = end_value[end_filename_pos:] - else: - filename = end_value - end_value = "" - # Filtrage de l'eventuel nom de fichier en remplacant les chiffres par le pattern XXX - # pour se rendre independant des eventuels index de fichiers temporaires - pos = 0 - while pos < len(filename): - c = filename[pos] - if c != "_" and not c.isdigit(): - filtered_filename += c - else: - filtered_filename += "XXX" - while pos < len(filename): - c = filename[pos] - if c != "_" and not c.isdigit(): - filtered_filename += c - break - pos += 1 - pos += 1 - filtered_value = ( - begin_value + " KHIOPS_TMP_DIR/" + filtered_filename + end_value - ) - else: - filtered_value = value - return filtered_value - - # Verifications - assert ref_file_path != "", "Missing ref file path" - assert test_file_path != "", "Missing test file path" - assert ref_file_lines is not None, "Missing ref file lines" - assert test_file_lines is not None, "Missing test file lines" - - # Recherche du fichier compare et de son extension - file_name = os.path.basename(ref_file_path) - assert file_name == os.path.basename(test_file_path) - _, file_extension = os.path.splitext(file_name) - - # test si fichier de temps - is_time_file = file_name == kht.TIME_LOG - - # test si fichier histogramme - is_histogram_file = "histogram" in file_name and file_extension == ".log" - - # test si fichier d'erreur - is_error_file = file_name == kht.ERR_TXT - - # test si fichier de benchmark - is_benchmark_file = file_name == "benchmark.xls" - - # Test si fichier json - is_json_file = is_file_with_json_extension(file_name) - - # initialisation des nombres d'erreurs et de warning - errors = 0 - warnings = 0 - numerical_warnings = 0 # Lie a une tolerance dee difference de valeur numerique - user_message_warnings = ( - 0 # Lie a un pattern de message avec tolerance (ex: "Not enough memory") - ) - - # Pas de controle si fichier de temps - if is_time_file: - utils.write_message("OK", log_file=log_file) - return errors, warnings, user_message_warnings - - # Comparaison des nombres de lignes - file_ref_line_number = len(ref_file_lines) - file_test_line_number = len(test_file_lines) - if file_test_line_number != file_ref_line_number: - utils.write_message( - "test file has " - + str(file_test_line_number) - + " lines and reference file has " - + str(file_ref_line_number) - + " lines", - log_file=log_file, - ) - errors = errors + 1 - - # comparaison ligne a ligne - max_threshold = 0 - max_print_error = 10 - max_field_length = 100 - skip_benchmark_lines = False - line_number = min(file_ref_line_number, file_test_line_number) - for index in range(line_number): - line = index + 1 - line_ref = ref_file_lines[index].rstrip() - line_test = test_file_lines[index].rstrip() - - # Cas special des fichiers de benchmark: - # on saute les blocs de ligne dont le role est le reporting de temps de calcul - # ("Time" dans le premier champ d'entete) - if is_benchmark_file and line_ref.find("Time") != -1: - skip_benchmark_lines = True - continue - if is_benchmark_file and skip_benchmark_lines: - # fin de bloc si ligne vide - if line_ref.find("\t") == -1: - skip_benchmark_lines = False - if skip_benchmark_lines: - continue - - # Ok si lignes egales - if line_ref == line_test: - continue - - # Cas special du fichier d'erreur: on tronque les lignes qui font du reporting de temps de calcul (" time:") - if ( - is_error_file - and line_ref.find(" time: ") != -1 - and line_test.find(" time: ") != -1 - ): - line_ref = filter_time(line_ref) - line_test = filter_time(line_test) - - # Cas special du fichier d'erreur: on tronque les lignes de stats sur les records des tables - if is_error_file: - record_stats_pattern = [" Table ", " Records: "] - if ( - utils.find_pattern_in_line(line_ref, record_stats_pattern) == 0 - and utils.find_pattern_in_line(line_test, record_stats_pattern) == 0 - ): - line_ref = line_ref[: line_ref.find(record_stats_pattern[-1])] - line_test = line_test[: line_test.find(record_stats_pattern[-1])] - - # Cas special du fichier d'erreur: - # on saute les lignes qui font du reporting de temps de calcul ("interrupted ") - if ( - is_error_file - and line_ref.lower().find(" interrupted ") != -1 - and line_test.lower().find(" interrupted ") != -1 - ): - continue - - # Cas special du fichier d'erreur, pour le message "(Operation canceled)" qui n'est pas case sensitive - if is_error_file: - if line_ref.find("(Operation canceled)") != -1: - line_ref = line_ref.replace( - "(Operation canceled)", "(operation canceled)" - ) - if line_test.find("(Operation canceled)") != -1: - line_test = line_test.replace( - "(Operation canceled)", "(operation canceled)" - ) - - # Cas special du fichier d'erreur en coclustering: - # on saute les lignes d'ecriture de rapport intermediaire qui different par le temps - # ("Write intermediate coclustering report") - if ( - is_error_file - and line_ref.find("Write intermediate coclustering report") != -1 - and line_test.find("Write intermediate coclustering report") != -1 - ): - continue - - # Cas special du fichier d'histogramme: - # on tronque les lignes qui font du reporting de temps de calcul (" time\t") - if ( - is_histogram_file - and line_ref.find("time") != -1 - and line_test.find("time") != -1 - ): - line_ref = line_ref[: line_ref.find("time")] - line_test = line_test[: line_test.find("time")] - # Cas special du fichier d'histogramme: - # on ignore les ligne avec le numero de version - if ( - is_histogram_file - and line_ref.find("Version") != -1 - and line_test.find("Version") != -1 - ): - continue - - # Cas special du caractere # en tete de premiere ligne de fichier - # pour l'identifiant de version d'application (ex: #Khiops 10.2.0) - tool_version_pattern = ["#", " "] - if ( - line == 1 - and utils.find_pattern_in_line(line_ref, tool_version_pattern) == 0 - and utils.find_pattern_in_line(line_test, tool_version_pattern) == 0 - ): - continue - - # Cas special du champ version des fichiers json (identifiant de version d'application) - if ( - is_json_file - and line_ref.find('"version": ') >= 0 - and line_test.find('"version": ') >= 0 - ): - continue - - # Traitement des patterns toleres pour la comparaison - if is_error_file or is_json_file: - resilience_found = False - for pattern in RESILIENCE_USER_MESSAGE_PATTERNS: - if ( - utils.find_pattern_in_line(line_ref, pattern) != -1 - and utils.find_pattern_in_line(line_test, pattern) != -1 - ): - # On renvoie un warning, en indiquant qu'il s'agit d'un warning de resilience - warnings += 1 - user_message_warnings += 1 - # Ecriture d'un warning - utils.write_message( - "warning : line " - + str(line) - + " " - + line_test.strip() - + " -> " - + line_ref.strip(), - log_file=log_file, - ) - resilience_found = True - break - if resilience_found: - continue - - # Sinon, on analyse les champs - line_fields_ref = line_ref.split("\t") - line_fields_test = line_test.split("\t") - - # comparaison des nombres de champs - field_number_ref = len(line_fields_ref) - field_number_test = len(line_fields_test) - if field_number_ref != field_number_test: - if errors < max_print_error: - utils.write_message( - "test file (line " - + str(line) - + ") has " - + str(field_number_test) - + " columns and reference file has " - + str(field_number_ref) - + " columns", - log_file=log_file, - ) - elif errors == max_print_error: - utils.write_message("...", log_file=log_file) - errors = errors + 1 - - # comparaison des champs - field_number_length = min(field_number_ref, field_number_test) - for i in range(field_number_length): - field_ref = line_fields_ref[i] - field_test = line_fields_test[i] - - # parcours des lignes champ par champs - # cas special du fichier d'erreur ou json: on tronque les chemins vers les repertoires temporaires de Khiops - if ( - (is_error_file or is_json_file) - and field_ref.find("~Khiops") != -1 - and field_test.find("~Khiops") != -1 - ): - field_ref = filter_khiops_temp_dir(field_ref) - field_test = filter_khiops_temp_dir(field_test) - - # cas general de comparaison de champs - [eval_res, threshold_res] = check_field(field_ref, field_test) - - # truncature des champs affiches dans les messages d'erreur - if len(field_test) > max_field_length: - field_test = field_test[0:max_field_length] + "..." - if len(field_ref) > max_field_length: - field_ref = field_ref[0:max_field_length] + "..." - # messages d'erreur - if eval_res == 0: - if errors < max_print_error or threshold_res > max_threshold: - utils.write_message( - "line " - + str(line) - + " field " - + str(i + 1) - + " " - + field_test - + " -> " - + field_ref, - log_file=log_file, - ) - elif errors == max_print_error: - utils.write_message("...", log_file=log_file) - errors += 1 - elif eval_res == 2: - warnings += 1 - if threshold_res > 0: - numerical_warnings += 1 - max_threshold = max(threshold_res, max_threshold) - if warnings > 0: - if numerical_warnings > 0: - utils.write_message( - str(numerical_warnings) + " warning(s) (epsilon difference)", - log_file=log_file, - ) - if user_message_warnings > 0: - utils.write_message( - str(user_message_warnings) - + " warning(s) (resilience to specific user message patterns)", - log_file=log_file, - ) - if errors == 0: - utils.write_message("OK", log_file=log_file) - if errors > 0: - message = str(errors) + " error(s)" - if max_threshold > 0: - message += " (max relative difference: " + str(max_threshold) + ")" - utils.write_message(message, log_file=log_file) - return errors, warnings, user_message_warnings - - -def split_field(field_value): - """Decoupage d'un champ (champ d'une ligne avec separateur tabulation) - en un ensemble de tokens elementaire pour le parsing d'un fichier json ou kdic - Permet ensuite de comparer chaque valeur de token, pour avoir une tolerance par rapport aux - mirco-variations des valeurs numeriques""" - # Pour gerer les double-quotes a l'interieur des strings, pour les format json et kdic - field_value = field_value.replace('\\"', "'") - field_value = field_value.replace('""', "'") - sub_fields = TOKEN_PARSER.findall(field_value) - return sub_fields - - -def is_time(val): - """Indique si une valeur est de type temps hh:mm:ss.ms""" - return TIME_PARSER.match(val.strip()) - - -def check_value(val1, val2): - """Comparaison de deux valeurs numeriques - Renvoie deux valeur: - - result: - - 1 si les valeurs sont identiques - - 2 si les la difference relative est toleree - - 0 si les valeurs sont differentes - - threshold: difference relative si result = 2 - """ - # Ok si valeurs egales - if val1 == val2: - return [1, 0] - # Sinon, tentative de comparaison numerique - threshold = float(0.00001) - try: - float1 = float(val1) - float2 = float(val2) - res = ( - 0.5 * abs(float1 - float2) / (abs(float1) / 2 + abs(float2) / 2 + threshold) - ) - if res <= threshold: - return [2, res] - return [0, res] - # Erreur si format non numerique et difference - except ValueError: - return [0, 0] - - -def check_field(field1, field2): - """ " Comparaison de deux champs - Pour les valeurs numeriques, une diffence relative de 0.00001 est toleree - Renvoie deux valeur: - - result: - - 1 si les champs sont identiques - - 2 si les la difference relative est toleree (warning) - - 0 si les champs sont differents (error) - - threshold: difference relative liee au cas erreur ou warning - """ - if field1 == field2: - return [1, 0] - - # si les deux champs sont des time, on renvoie OK pour ignorer la comparaison - if is_time(field1) and is_time(field2): - return [1, 0] - - # uniformisation entre windows et linux pour les chemins de fichier - # on va remplacer les \ par des / - string1 = field1.replace("\\", "/") - string2 = field2.replace("\\", "/") - # Tolerance temporaire pour le passage au format hdfs - # hdfs_value1 = field1.replace("./", "") - # hdfs_value1 = hdfs_value1.replace(".\\/..\\/", "") - # hdfs_value1 = hdfs_value1.replace("..\\/", "") - # hdfs_value1 = hdfs_value1.replace(".\\/", "") - # hdfs_value2 = field2.replace("./", "") - # hdfs_value2 = hdfs_value2.replace(".\\/..\\/", "") - # hdfs_value2 = hdfs_value2.replace("..\\/", "") - # hdfs_value2 = hdfs_value2.replace(".\\/", "") - # if hdfs_value1 == hdfs_value2: - # return [1, 0] - if string1 == string2: - return [1, 0] - - # sinon c'est peut etre un probleme d'arrondi - # on accepte les differences relatives faibles - if NUMERIC_PARSER.match(field1) and NUMERIC_PARSER.match(field2): - [eval_result, threshold_result] = check_value(field1, field2) - return [eval_result, threshold_result] - else: - # on arrive pas a le convertir en float, ce n'est pas un nombre - # on decoupe chaque champ sous la forme d'un ensemble de sous-chaines qui sont soit - # des libelles, soit des float - sub_fields1 = split_field(field1) - sub_fields2 = split_field(field2) - - # nombre de sous-chaines differentes: il y a erreur - if len(sub_fields1) != len(sub_fields2): - return [0, 0] - # comparaison pas a pas - else: - i = 0 - length = len(sub_fields1) - warnings = 0 - errors = 0 - max_warning_threshold = 0 - max_error_threshold = 0 - while i < length: - [eval_result, threshold_result] = check_value( - sub_fields1[i], sub_fields2[i] - ) - # Traitement des erreurs - if eval_result == 0: - errors += 1 - max_error_threshold = max(threshold_result, max_error_threshold) - # Traitement des warnings - if eval_result == 2: - warnings += 1 - max_warning_threshold = max(threshold_result, max_warning_threshold) - i = i + 1 - if errors > 0: - return [0, max_error_threshold] - elif warnings > 0: - return [2, max_warning_threshold] - else: - return [1, 0] - - -def initialize_parsers(): - """Initialisation de parsers sont compile une fois pour toutes - Retourne les parsers de token, de numeric et de time - """ - # Delimiters pour les fichiers json et kdic - delimiters = [ - "\\,", - "\\{", - "\\}", - "\\[", - "\\]", - "\\:", - "\\(", - "\\)", - "\\<", - "\\>", - "\\=", - ] - numeric_pattern = "-?[0-9]+\\.?[0-9]*(?:[Ee]-?[0-9]+)?" - string_pattern = ( - '"[^"]*"' # Sans les double-quotes dans les strings (dur a parser...) - ) - time_pattern = "\\d{1,2}:\\d{2}:\\d{2}\\.?\\d*" - other_tokens = "[\\w]+" - tokens = time_pattern + "|" + numeric_pattern + "|" + string_pattern - for delimiter in delimiters: - tokens += "|" + delimiter - tokens += "|" + other_tokens - token_parser = re.compile(tokens) - numeric_parser = re.compile(numeric_pattern) - time_parser = re.compile(time_pattern) - return token_parser, numeric_parser, time_parser - - -# Parsers en variables globales, compiles une seule fois au chargement du module -# - le parser de tokens permet d'analyser de facon detaillee le contenu d'un -# fichier json ou dictionnaire (.kdic) en le decomposant en une suite de tokens -# separateur, valeur numerique opu categorielle entre double-quotes. -# - le parser de numerique est specialise pour les valeurs numeriques au format scientifique -# - le parser de time est specialise pour le format time hh:mm:ss.ms -TOKEN_PARSER, NUMERIC_PARSER, TIME_PARSER = initialize_parsers() +# Copyright (c) 2024 Orange. All rights reserved. +# This software is distributed under the BSD 3-Clause-clear License, the text of which is available +# at https://spdx.org/licenses/BSD-3-Clause-Clear.html or see the "LICENSE" file for more details. + +import os.path +import re + +import _kht_constants as kht +import _kht_utils as utils +import _kht_results_management as results + +""" +Verification des resultats d'un repertoire de test terminal + +La comparaison est effectue entre les resultats de test, et les resultats de reference +correspondant au contexte en cours (plateforme, parallel ou sequuentiel...). +Elle se fait sur tous les fichiers du repertoire de facon hierarchique +- nombre de fichiers de chaque repertoire +- noms des fichiers +- pour chaque fichier + - nombre de lignes + - contenu + - comparaison des lignes + - si necessaire, comparaison des champs des lignes, pour un separateur tabulation + - si necessaire, comparaison des tokens du champ, + dans le cas de la tokenisation d'un fichier json ou kdic + +La comparaison se fait en etant tolerant aux variations 'normales' selon le contexte d'execution +- il peut y avoir des resultats de reference different selon le contexte +- on filtre prealablement certaines informations non presentes systematiquement + - copyright + - prefix de type '[0] ' lie au process, genere par mpiexec en parallele + - statistique sur la memoire ne mode debug + ... +- il y a une tolerance sur les valeur numeriques, ce qui entraine alors des warning et non des erreurs +- ... + +En cas d'erreurs residuelles, plusieurs strategies de recouvrement des erreurs sont utilises, +a differents moments du processus de comparaison +- tolerance sur echec de scenario, si cela correspond au resultats de reference +- tolerance aux noms de fichier utilisant des caracteres accentues systeme dependant +- tolerance sur les messages d'erreurs differents en parallele et en sequentiel +- tolerance sur les message d'erreur lies au manque de ressource +... +""" + +# Nom du fichier de comparaison +COMPARISON_LOG_FILE_NAME = kht.COMPARISON_RESULTS_LOG + +# Constantes de la section SUMMARY des fichiers de log des resultats de comparaison +SUMMARY_TITLE = "SUMMARY" +SUMMARY_WARNING_KEY = "warning(s)" +SUMMARY_ERROR_KEY = "error(s)" +SUMMARY_FILE_TYPES_KEY = "Problem file types: " +SUMMARY_NOTE_KEY = "Note: " +SUMMARY_PORTABILITY_KEY = "Portability: " + +# Constantes pour la gestion des fichiers speciaux, par priorite decroissante +SUMMARY_TIMEOUT_ERROR_KEY = "TIMEOUT ERROR" +SUMMARY_FATAL_ERROR_KEY = "FATAL ERROR" +SUMMARY_UNEXPECTED_OUTPUT_KEY = "UNEXPECTED OUTPUT" +SUMMARY_SPECIAL_FILE_KEYS = [ + SUMMARY_TIMEOUT_ERROR_KEY, + SUMMARY_FATAL_ERROR_KEY, + SUMMARY_UNEXPECTED_OUTPUT_KEY, +] + +# Association entre type de fichier special et cle de gestion dans le resume +SUMMARY_SPECIAL_FILE_KEYS_PER_FILE = { + kht.STDOUT_ERROR_LOG: SUMMARY_UNEXPECTED_OUTPUT_KEY, + kht.STDERR_ERROR_LOG: SUMMARY_UNEXPECTED_OUTPUT_KEY, + kht.PROCESS_TIMEOUT_ERROR_LOG: SUMMARY_TIMEOUT_ERROR_KEY, + kht.RETURN_CODE_ERROR_LOG: SUMMARY_FATAL_ERROR_KEY, +} +assert len(SUMMARY_SPECIAL_FILE_KEYS_PER_FILE) == len(kht.SPECIAL_ERROR_FILES) + +# Ensemble des cle pouvant se trouver dans le resume +ALL_SUMMARY_KEYS = [ + SUMMARY_WARNING_KEY, + SUMMARY_ERROR_KEY, + SUMMARY_FILE_TYPES_KEY, + SUMMARY_PORTABILITY_KEY, +] + SUMMARY_SPECIAL_FILE_KEYS +assert len(set(ALL_SUMMARY_KEYS)) == len(ALL_SUMMARY_KEYS), ( + "Summary keys " + str(ALL_SUMMARY_KEYS) + " must not contain duplicates" +) + + +def analyse_comparison_log(test_dir): + """ + Analyse du log de comparaison des resultats de test et de reference + present dans un repertoire de test + Renvoie: + - error_number + Le nombre d'erreurs deduit du resume + - warning_number + Le nombre de warnings deduit du resume + - summary_infos: + Un dictionnaire par avec une ligne de texte par cle de resume (ALL_SUMMARY_KEYS) + - files_infos: + Un dictionaire par nom de fichier contenant le resultat de la comparaison + pour ce fichier, sous la forme d'un texte potentiellement multi-lignes + Ce texte contient 'OK' uniquement si aucun problme n'est detecte + Il contient des lignes de texte, dont certain sont potentiellement prefixes par 'warning: ' + ou 'error : ' sinon + Si le log de comparaison n'est pas disponible ou exploitable, on retourne une erreur + """ + + def extract_number(message): + assert message != "" + fields = message.split() + assert fields[0].isdigit() + number = int(fields[0]) + return number + + utils.check_test_dir(test_dir) + + # Initialisation des resultats + error_number = 0 + warning_number = 0 + summary_infos = {} + files_infos = {} + + # Traitement des erreurs memorisee dans le log + log_file_path = os.path.join(test_dir, kht.COMPARISON_RESULTS_LOG) + if not os.path.isfile(log_file_path): + # Erreur speciale si pas de fichier de comparaison + error_number = 1 + summary_infos[SUMMARY_NOTE_KEY] = "The test has not been launched" + else: + try: + with open(log_file_path, "r", errors="ignore") as log_file: + lines = log_file.readlines() + except Exception as exception: + # Erreur speciale si probleme de lecture du fichier de comparaison + lines = None + error_number = 1 + summary_infos[SUMMARY_NOTE_KEY] = ( + "Unable to read file " + kht.COMPARISON_RESULTS_LOG + str(exception) + ) + # Analyse du contenu du fichier + file_pattern = "file " + if lines is not None: + index = 0 + while index < len(lines): + line = lines[index] + index += 1 + line = line.strip() + + # Analyse des lignes concernant chaque fichier avant le resume + if line.find(file_pattern) == 0: + file_path = line[len(file_pattern) :] + file_name = os.path.basename(file_path) + file_info = "" + while index < len(lines): + line = lines[index] + index += 1 + line = line.strip() + if line == "": + break + else: + if file_info != "": + file_info += "\n" + file_info += line + files_infos[file_name] = file_info + continue + + # Analyse du resume jsuq'u la fin du fichier si debut de resume trouve + if line == SUMMARY_TITLE: + while index < len(lines): + line = lines[index] + index += 1 + line = line.strip() + for key in ALL_SUMMARY_KEYS: + if line.find(key) >= 0: + summary_infos[key] = line + if key == SUMMARY_WARNING_KEY: + warning_number = extract_number(line) + elif key == SUMMARY_ERROR_KEY: + error_number = extract_number(line) + + # Erreur speciale si le resume n'est pas trouve + if len(summary_infos) == 0: + assert error_number == 0 + error_number = 1 + specific_message = ( + "Section '" + + SUMMARY_TITLE + + "' not found in " + + kht.COMPARISON_RESULTS_LOG + ) + summary_infos[SUMMARY_NOTE_KEY] = specific_message + # Retour des resultats + return error_number, warning_number, summary_infos, files_infos + + +def check_results(test_dir, forced_context=None): + """ + Fonction principale de comparaison des resultats de test et de reference + Les fichiers sont compares 2 a 2 et la synthese de la comparaison est ecrite + dans un fichier de log, avec un resume en fin de fichier, facile a parser + On retourne True s'il n'y a aucune erreur + + Le parametrage d'un contexte force en entree permete d'effectuer la comparaison avec + un contexte (parallel|sequential, platform) alternatif. Dans ce cas: + - l'objectif est essentiellement de renvoyer un indicateur global de succes de la comparaison + - on n'ecrit pas de fichier de comparaison + """ + utils.check_test_dir(test_dir) + + # Initialisation des stats de comparaison + special_error_file_error_numbers = {} + for file_name in kht.SPECIAL_ERROR_FILES: + special_error_file_error_numbers[file_name] = 0 + error_number = 0 + warning_number = 0 + user_message_warning_number = 0 + compared_files_number = 0 + error_number_in_err_txt = 0 + error_number_per_extension = {} + error_number_per_file = {} + erroneous_ref_file_lines = {} + erroneous_test_file_lines = {} + erroneous_file_names = [] + extension_message = "" + specific_message = "" + portability_message = "" + recovery_message = "" + + # Ouverture du fichier de log de comparaison, sauf si lle contexte est force + log_file = None + if forced_context is None: + log_file_path = os.path.join(test_dir, COMPARISON_LOG_FILE_NAME) + try: + log_file = open(log_file_path, "w", errors="ignore") + except Exception as exception: + print("error : unable to create log file " + log_file_path, exception) + return + assert log_file is not None + utils.write_message( + utils.test_dir_name(test_dir) + " comparison", log_file=log_file + ) + + # Information sur le contexte courant de comparaison des resultats + if forced_context is None: + current_context = results.get_current_results_ref_context() + utils.write_message( + "current comparison context : " + str(current_context), + log_file=log_file, + ) + else: + current_context = forced_context + + # Test de presence du repertoire de test a comparer + results_dir = os.path.join(test_dir, kht.RESULTS) + if not os.path.isdir(results_dir): + utils.write_message( + "error : no comparison, test directory not available (" + results_dir + ")", + log_file=log_file, + show=True, + ) + error_number = error_number + 1 + + # Recherche du repertoire courant des resultats de reference + results_ref, candidate_dirs = results.get_results_ref_dir( + test_dir, forced_context=forced_context, log_file=log_file, show=True + ) + if results_ref is None: + utils.write_message( + "error : invalid " + + kht.RESULTS_REF + + " dirs " + + utils.list_to_label(candidate_dirs), + log_file=log_file, + show=True, + ) + error_number = error_number + 1 + elif len(candidate_dirs) >= 2: + portability_message = ( + "used " + results_ref + " dir among " + utils.list_to_label(candidate_dirs) + ) + utils.write_message( + portability_message, + log_file=log_file, + show=True, + ) + + # Test de presence du repertoire de reference a comparer + results_ref_dir = "" + if error_number == 0: + results_ref_dir = os.path.join(test_dir, results_ref) + if not os.path.isdir(results_ref_dir): + utils.write_message( + "error : no comparison, reference directory not available (" + + results_ref_dir + + ")", + log_file=log_file, + show=True, + ) + error_number = error_number + 1 + + # Comparaison effective si possible + if error_number == 0: + # Acces aux fichiers des repertoires de reference et de test + # On passe par le format bytes des noms de fichier pour avoir acces + # aux fichier quelque soit la plateforme + # - Windows ne supporte que l'utf8 + # - Linux stocke les noms directement sous la forme de bytes + ref_byte_file_names = os.listdir(os.fsencode(results_ref_dir)) + test_byte_file_names = os.listdir(os.fsencode(results_dir)) + + # On memorise les noms de fichiers sous forme de string pour faciliter le reporting + # Tout en gardant l'association entre le nom python (utf8) et les noms en bytes + # + # Attention, la methode fsdecode utilise des 'surrogate characters' invisible + # permettant de garder trace des bytes non utf8 pour le re-encodage par fsencode si necessaire + # On passe par une version 'nettoyee' purement ascii de ces caracteres speciaux pour memoriser + # l'association entre un nom de fichier de type string et un nom de type bytes + # Dans ce cas, il suffit de memoriser dans les resultats de reference la + # version du nom de fichier sans bytes (valide quelque soit la plateforme) + # Pour les resultats de test, le nom peut comporter des bytes, mais on tolere + # la comparaison si sa version nettoyee est la meme que pour le fichier de reference + ref_file_names = [] + dic_ref_byte_file_names = {} + recovery = False + for byte_file_name in ref_byte_file_names: + file_name = os.fsdecode(byte_file_name) + cleaned_file_name = file_name.encode("ascii", "ignore").decode("ascii") + if cleaned_file_name != file_name: + utils.write_message( + "warning : reference file name with a byte encoding (" + + str(byte_file_name) + + ") used under ascii name (" + + cleaned_file_name + + ")", + log_file=log_file, + ) + warning_number += 1 + recovery = True + ref_file_names.append(cleaned_file_name) + dic_ref_byte_file_names[cleaned_file_name] = byte_file_name + # Idem pour les resultat de test + test_file_names = [] + dic_test_byte_file_names = {} + for byte_file_name in test_byte_file_names: + file_name = os.fsdecode(byte_file_name) + cleaned_file_name = file_name.encode("ascii", "ignore").decode("ascii") + if cleaned_file_name != file_name: + utils.write_message( + "warning : test file name with a byte encoding (" + + str(byte_file_name) + + ") used under ascii name (" + + cleaned_file_name + + ")", + log_file=log_file, + ) + warning_number += 1 + recovery = True + test_file_names.append(cleaned_file_name) + dic_test_byte_file_names[cleaned_file_name] = byte_file_name + + # Message de recuperation d'erreur si necessaire + if recovery: + utils.write_message( + "\nRecovery from errors caused by byte encoding of file names in another platform", + log_file=log_file, + ) + recovery_message = utils.append_message( + recovery_message, "Recovery of type byte encoding of file names" + ) + + # On tri par nom de fichier pour ameliorer la stabilite du reporting inter plateformes + ref_file_names.sort() + test_file_names.sort() + + # Comparaison des nombres de fichiers + ref_result_file_number = len(ref_file_names) + test_result_file_number = len(test_file_names) + if ref_result_file_number == 0: + utils.write_message( + "error : no comparison, missing reference result files", + log_file=log_file, + show=True, + ) + error_number = error_number + 1 + elif ref_result_file_number != test_result_file_number: + utils.write_message( + "\nerror : number of results files (" + + str(test_result_file_number) + + ") should be " + + str(ref_result_file_number), + log_file=log_file, + show=True, + ) + error_number = error_number + 1 + # Affichage des noms des fichiers supplementaires + max_file_reported = 20 + if test_result_file_number > ref_result_file_number: + # Message specifique en cas de fichiers en trop + specific_message = utils.append_message( + specific_message, "additional result files" + ) + utils.write_message( + "Additional files in " + kht.RESULTS + " dir:", log_file=log_file + ) + file_reported = 0 + for file_name in test_file_names: + if file_name not in ref_file_names: + if file_reported < max_file_reported: + utils.write_message("\t" + file_name, log_file=log_file) + else: + utils.write_message("\t...", log_file=log_file) + break + file_reported += 1 + elif test_result_file_number < ref_result_file_number: + # Message specifique en cas de fichiers manquants + specific_message = utils.append_message( + specific_message, "missing result files" + ) + utils.write_message( + "Missing files in " + kht.RESULTS + " dir:", log_file=log_file + ) + file_reported = 0 + for file_name in ref_file_names: + if file_name not in test_file_names: + if file_reported < max_file_reported: + utils.write_message("\t" + file_name, log_file=log_file) + else: + utils.write_message("\t...", log_file=log_file) + break + file_reported += 1 + + # Comparaison des fichiers 2 a 2 en memorisant les erreurs par extension + for file_name in ref_file_names: + compared_files_number = compared_files_number + 1 + + # Path des fichiers utilises pour le reporting + ref_file_path = os.path.join(results_ref_dir, file_name) + test_file_path = os.path.join(results_dir, file_name) + + # En-tete de comparaison des fichiers + utils.write_message("\nfile " + test_file_path, log_file=log_file) + + # On utilise si possible le path des fichiers en bytes pour s'adapter aux contraintes de la plateforme + # Les erreurs seront diagnostiquees si necessaire lors de la lecture des fichiers + used_ref_file_path = ref_file_path + if dic_ref_byte_file_names.get(file_name) is not None: + used_ref_file_path = os.path.join( + os.fsencode(results_ref_dir), dic_ref_byte_file_names.get(file_name) + ) + used_test_file_path = test_file_path + if dic_test_byte_file_names.get(file_name) is not None: + used_test_file_path = os.path.join( + os.fsencode(results_dir), dic_test_byte_file_names.get(file_name) + ) + + # Lecture des fichiers + ref_file_lines = utils.read_file_lines( + used_ref_file_path, log_file=log_file + ) + test_file_lines = utils.read_file_lines( + used_test_file_path, log_file=log_file + ) + if ref_file_lines is None: + error_number = error_number + 1 + if test_file_lines is None: + error_number = error_number + 1 + + # Comparaison si ok + if ref_file_lines is not None and test_file_lines is not None: + # Cas des fichiers stdout et stderr, que l'on filtre du prefix de process id presnet en parallele + if file_name in [kht.STDOUT_ERROR_LOG, kht.STDERR_ERROR_LOG]: + ref_file_lines = utils.filter_process_id_prefix_from_lines( + ref_file_lines + ) + test_file_lines = utils.filter_process_id_prefix_from_lines( + test_file_lines + ) + + # Mise en forme specifique des messages utilisateurs (error, warning) pour les traiter + # de facon identique dans les cas des fichiers de log utilisateur et json + contains_user_messages = False + # Cas du fichier de log utilisateur + if file_name == kht.ERR_TXT: + contains_user_messages = True + # Identification des lignes de message + ref_file_lines = strip_user_message_lines(ref_file_lines) + test_file_lines = strip_user_message_lines(test_file_lines) + # Cas des fichiers json + elif is_file_with_json_extension(file_name): + contains_user_messages = True + # Pretraitement des lignes de message pour les mettre dans le meme format + # que pour les fichier d'erreur + ref_file_lines = strip_user_message_lines_in_json_file( + ref_file_lines + ) + test_file_lines = strip_user_message_lines_in_json_file( + test_file_lines + ) + + # Filtrage des messages specifiques au sequentiel (100th...) + if contains_user_messages: + ref_file_lines = filter_sequential_messages_lines( + ref_file_lines, log_file=log_file + ) + test_file_lines = filter_sequential_messages_lines( + test_file_lines, log_file=log_file + ) + + # Comparaison des fichiers pre-traites + errors, warnings, user_message_warnings = check_file_lines( + ref_file_path, + test_file_path, + ref_file_lines, + test_file_lines, + log_file=log_file, + ) + error_number += errors + warning_number += warnings + user_message_warning_number += user_message_warnings + + # Memorisation des statistiques par extension + if errors > 0: + erroneous_file_names.append(file_name) + error_number_per_file[file_name] = errors + erroneous_ref_file_lines[file_name] = ref_file_lines + erroneous_test_file_lines[file_name] = test_file_lines + if file_name == kht.ERR_TXT: + error_number_in_err_txt += errors + else: + _, file_extension = os.path.splitext(file_name) + error_number_per_extension[file_extension] = ( + error_number_per_extension.get(file_extension, 0) + errors + ) + + # Message synthetique de recuperation des warnng sur les message utilisateur si necessaire + if user_message_warning_number > 0: + recovery_message = utils.append_message( + recovery_message, "Recovery from varying patterns in user messages" + ) + + # Recherche des erreurs fatales, avec tentative de recuperation + # On accepte les erreurs fatales que si on ales meme en test et reference, + # et uniquement dans le cas du pattern particulier du "Batch mode failure" qui est du + # a des scenario n'ayant pas pu s'excuter entierement pour des raison de portabilite + fatal_error_recovery = True + for file_name in test_file_names: + # Cas d'une erreur fatale + if file_name in kht.SPECIAL_ERROR_FILES: + special_error_file_error_numbers[file_name] = ( + special_error_file_error_numbers[file_name] + 1 + ) + error_number += 1 + special_error = SUMMARY_SPECIAL_FILE_KEYS_PER_FILE[file_name].lower() + utils.write_message( + "\n" + special_error + " : found file " + file_name, + log_file=log_file, + ) + + # La tentative de recuperation des erreurs fatales echoue si on ne respecte + # pas toutes les conditions necessaires + if file_name not in [kht.STDERR_ERROR_LOG, kht.RETURN_CODE_ERROR_LOG]: + fatal_error_recovery = False + else: + # Les fichiers doivent etre les memes + if ( + file_name in erroneous_file_names + or file_name not in ref_file_names + ): + fatal_error_recovery = False + # Test que le fichier est reduit au pattern accepte + if not fatal_error_recovery: + # Lecture des lignes du fichier + test_file_path = os.path.join(results_dir, file_name) + test_file_lines = utils.read_file_lines( + test_file_path, log_file=log_file + ) + # Pattern dans le cas de sdterr + fatal_error_pattern = ( + "fatal error : Command file : Batch mode failure" + ) + if file_name == kht.STDERR_ERROR_LOG: + if ( + len(test_file_lines) == 0 + or test_file_lines[0].strip() != fatal_error_pattern + ): + fatal_error_recovery = False + # Pattern dans le cas du code retour + return_code_error_pattern = "Wrong return code: 1 (should be 0)" + if file_name == kht.RETURN_CODE_ERROR_LOG: + if ( + len(test_file_lines) == 0 + or test_file_lines[0].strip() + != return_code_error_pattern + ): + fatal_error_recovery = False + # Message de recuperation si necessaire + if special_error_file_error_numbers[kht.RETURN_CODE_ERROR_LOG] > 0: + # Cas de la recuperation + if fatal_error_recovery: + error_number -= special_error_file_error_numbers[ + kht.RETURN_CODE_ERROR_LOG + ] + error_number -= special_error_file_error_numbers[kht.STDERR_ERROR_LOG] + special_error_file_error_numbers[kht.RETURN_CODE_ERROR_LOG] = 0 + special_error_file_error_numbers[kht.STDERR_ERROR_LOG] = 0 + utils.write_message( + "\nRecovery from fatal errors caused solely by a 'Batch mode failure' in another platform", + log_file=log_file, + ) + recovery_message = utils.append_message( + recovery_message, "Recovery of type 'Batch mode failure'" + ) + + # Ecriture des premieres lignes des fichiers d'erreur fatales ou de timeout si necessaire + for file_name in test_file_names: + if ( + file_name in kht.SPECIAL_ERROR_FILES + and special_error_file_error_numbers[file_name] > 0 + ): + # Lecture des lignes du fichier + test_file_path = os.path.join(results_dir, file_name) + test_file_lines = utils.read_file_lines( + test_file_path, log_file=log_file + ) + utils.write_message( + "\nspecial error file " + test_file_path, log_file=log_file + ) + max_print_lines = 10 + for i, line in enumerate(test_file_lines): + if i < max_print_lines: + utils.write_message("\t" + line.rstrip(), log_file=log_file) + else: + utils.write_message("\t...", log_file=log_file) + break + + # Il y a plusieurs tentatives de recuperation des erreurs pour des jeux de test ou des variation normales + # sont possibles, comme par exemple des difference sur la caucl de l'auc en cas de manque de ressource + # Ces tentatives sont implementees de facon pragmatique (code minimaliste, facile a developper et faire evoluer) + # pour automatiser l'analyse manuelle des resultats qui ete effectuee auparavant + # On ne cherche pas a ere resilient a tous les cas possibles, ni a gerer la complexite des types de recuperation + # pouvant se combiner. Ces methodes de recuperation ne servent parfois que pour un seul jeu de donnees, + # et il ne faut pas hesiter si besoin a simplifier certains jeux de test pour eviter qu'ils combinent + # plusieurs problemes de recuperation + + # Tentative de recuperation des erreurs si la seule difference provient du fichier de log de Khiops + # et est du a des warning en nombre variable en mode parallele, sans ecriture de rapport + if error_number > 0: + varying_warning_messages_in_err_txt_recovery = True + + # Les messages doivent n'apparaitre que dans le fichier de log + if varying_warning_messages_in_err_txt_recovery: + varying_warning_messages_in_err_txt_recovery = ( + error_number == error_number_in_err_txt + ) + + # Filtrage d'un certain type de warning pour recommencer la comaraison + if varying_warning_messages_in_err_txt_recovery: + # Acces aux lignes des fichier + ref_file_lines = erroneous_ref_file_lines.get(kht.ERR_TXT) + test_file_lines = erroneous_test_file_lines.get(kht.ERR_TXT) + + # Filtrage des lignes selon le motif en nombre variable + warning_pattern1 = "warning : Data table slice " + warning_pattern2 = " : Read data table slice interrupted by user" + filtered_ref_file_lines = [] + filtered_test_file_lines = [] + for line in ref_file_lines: + if line.find(warning_pattern1) != 0 or line.find(warning_pattern2) < 0: + filtered_ref_file_lines.append(line) + for line in test_file_lines: + if line.find(warning_pattern1) != 0 or line.find(warning_pattern2) < 0: + filtered_test_file_lines.append(line) + + # Comparaison a nouveau des fichiers, en mode non verbeux + errors, warnings, user_message_warnings = check_file_lines( + kht.ERR_TXT, + kht.ERR_TXT, + filtered_ref_file_lines, + filtered_test_file_lines, + ) + + # Recuperation possible si plus d'erreur apres filtrage + varying_warning_messages_in_err_txt_recovery = errors == 0 + + # Recuperation effective des erreurs si possible + if varying_warning_messages_in_err_txt_recovery: + # Messages sur la recuperation + recovery_summary = ( + "Recovery from varying warning number in " + kht.ERR_TXT + " file only" + ) + recovery_message = utils.append_message(recovery_message, recovery_summary) + utils.write_message("\n" + recovery_summary + ":", log_file=log_file) + utils.write_message( + "\tall errors come from the warning in " + + kht.ERR_TXT + + " file only, du to varying number of active process number", + log_file=log_file, + ) + utils.write_message( + "\t" + str(error_number) + " errors converted to warnings", + log_file=log_file, + ) + # On transforme les erreur en warning + warning_number += error_number + error_number = 0 + # On reinitialise egalement les stats d'erreur pour les extensions concernees + error_number_in_err_txt = 0 + + # Tentative de recuperation des erreurs si la seule difference est une difference d'ordre + # des messages utilisateur (error ou warning) + if error_number > 0: + unsorted_user_messages_recovery = True + + # Verification de la repartition des nombres d'erreur + if unsorted_user_messages_recovery: + # Recherche du nombre d'erreur dans les rapport json + error_number_in_json_report_files = error_number_per_extension.get( + ".khj", 0 + ) + error_number_per_extension.get(".khcj", 0) + + # On test si le nombre total d'erreur se rapartit entre le fichier de log utilisateur + # et les rapports json + unsorted_user_messages_recovery = ( + error_number_in_err_txt == error_number_in_json_report_files + and error_number_in_err_txt + error_number_in_json_report_files + == error_number + ) + + # Analyse specifique de la sous partie des fichiers correspondant aux messages utilisateur, + # qui ont ete marque en stripant les lignes correspondantes + if unsorted_user_messages_recovery: + + def filter_record_index_from_lines(lines): + """Filtrage avance des lignes en supprimant le debut de ligne jusqu'a l'index de record""" + filtered_lines = [] + record_index_pattern = [ + "warning : Data table ", + " : Record ", + " : Field ", + ] + for input_line in lines: + pos1 = utils.find_pattern_in_line(input_line, record_index_pattern) + if pos1 >= 0: + input_line = input_line[ + input_line.find(record_index_pattern[-1]) : + ] + filtered_lines.append(input_line) + return filtered_lines + + # Parcours des fichiers concerne pour reanalyser leur lignes specifiques aux erreurs + user_message_error_number = 0 + recovered_error_number = 0 + recovered_warning_number = 0 + for file_name in erroneous_file_names: + # Recherche des lignes des fichiers erronnes + test_file_lines = erroneous_test_file_lines.get(file_name) + if test_file_lines is not None: + ref_file_lines = erroneous_ref_file_lines.get(file_name) + assert ref_file_lines is not None + # Extraction des lignes stripees, qui correspond aux messages utilisateurs + test_file_lines = extract_striped_lines(test_file_lines) + ref_file_lines = extract_striped_lines(ref_file_lines) + # Comparaison de la partie des fichiers pre-traites relative aux messages utilisateur + # La comparaison se fait de facon muette, sans passer par le fichier de log + errors, warnings, user_message_warnings = check_file_lines( + file_name, + file_name, + ref_file_lines, + test_file_lines, + ) + user_message_error_number += errors + # Comparaison filtree les messages utilisateurs jusqu'aux index des records, + # qui peuvent varier d'une execution a l'autre, puis les avoir trier + test_file_lines = filter_record_index_from_lines(test_file_lines) + ref_file_lines = filter_record_index_from_lines(ref_file_lines) + test_file_lines.sort() + ref_file_lines.sort() + errors, warnings, user_message_warnings = check_file_lines( + file_name, + file_name, + ref_file_lines, + test_file_lines, + ) + recovered_error_number += errors + recovered_warning_number += warnings + + # Il faut que les erreurs ne proviennent que des messages utilisateurs + if unsorted_user_messages_recovery: + unsorted_user_messages_recovery = ( + user_message_error_number == error_number + ) + # Il faut qu'il n'y ai plus d'erreur apres tri des message utilisateurs + if unsorted_user_messages_recovery: + unsorted_user_messages_recovery = recovered_error_number == 0 + + # Recuperation effective des erreurs si possible + if unsorted_user_messages_recovery: + # Messages sur la recuperation + recovery_summary = "Recovery from unsorted user messages" + recovery_message = utils.append_message(recovery_message, recovery_summary) + utils.write_message("\n" + recovery_summary + ":", log_file=log_file) + utils.write_message( + "\tall errors come from the users messages in " + + kht.ERR_TXT + + " and in json reports, with a different order and possibly different record indexes", + log_file=log_file, + ) + utils.write_message( + "\t" + str(error_number) + " errors converted to warnings", + log_file=log_file, + ) + # On transforme les erreur en warning + warning_number += error_number + error_number = 0 + # On reinitialise egalement les stats d'erreur pour les extensions concernees + error_number_per_extension[".khj"] = 0 + error_number_per_extension[".khcj"] = 0 + error_number_in_err_txt = 0 + + # Tentative de recuperation des erreurs si la seule difference provient de la limite des ressources + # qui ne permet pas de calcul la courbe de ROC de facon exacte + if error_number > 0: + roc_curve_recovery = True + + # On verifie d'abord qu'il y a un warning correspondant dans le log utilisateur + if roc_curve_recovery: + # On doit potentiellement relire ce fichier, car ce type de message correspond + # a un motif USER qui ne genere pas d'erreur + err_file_lines = erroneous_test_file_lines.get(kht.ERR_TXT) + if err_file_lines is None: + err_file_path = os.path.join(results_dir, kht.ERR_TXT) + err_file_lines = utils.read_file_lines(err_file_path) + if err_file_lines is None: + roc_curve_recovery = False + else: + searched_warning = ( + "warning : Evaluation Selective Naive Bayes : " + + "Not enough memory to compute the exact AUC:" + + " estimation made on a sub-sample of size" + ) + roc_curve_recovery = ( + utils.find_pattern_in_lines(err_file_lines, [searched_warning]) >= 0 + ) + + # Comptage des erreurs pour les fichier d'evaluation au format xls + error_number_in_json_report_files = 0 + if roc_curve_recovery: + error_number_in_evaluation_xls = 0 + for file_name in erroneous_file_names: + _, file_extension = os.path.splitext(file_name) + if file_extension == ".xls" and "EvaluationReport" in file_name: + error_number_in_evaluation_xls += error_number_per_file.get( + file_name + ) + # On teste si les nombre d'erreurs se rappartis dans le fichier de log utilisateur, + # les rapports json et les fichiers d'evalauation au format xls + error_number_in_json_report_files = error_number_per_extension.get( + ".khj", 0 + ) + roc_curve_recovery = ( + error_number_in_err_txt + + error_number_in_json_report_files + + error_number_in_evaluation_xls + == error_number + ) + + # Analyse specifique des rapports json en excluant la partie lie a la courbe de ROC + roc_curve_error_number = 0 + roc_curve_warning_number = 0 + if roc_curve_recovery: + for file_name in erroneous_file_names: + _, file_extension = os.path.splitext(file_name) + if file_extension == ".khj": + # Parcours des fichiers concerne pour reanalyser leur lignes specifiques aux erreurs + test_file_lines = erroneous_test_file_lines.get(file_name) + ref_file_lines = erroneous_ref_file_lines.get(file_name) + assert test_file_lines is not None + assert ref_file_lines is not None + # Extraction des champs qui correspondent au calcul de l'AUC et des courbes de ROC + for key in ["auc", "values"]: + # Selection d'un champ selon sa valeur + selected_test_file_lines = ( + extract_key_matching_lines_in_json_file( + test_file_lines, key + ) + ) + selected_ref_file_lines = ( + extract_key_matching_lines_in_json_file(ref_file_lines, key) + ) + # Comparaison de la partie des fichiers pre-traites relative aux messages utilisateur + # La comparaison se fait de facon muette, sans passer par le ficheir de log + errors, warnings, user_message_warnings = check_file_lines( + file_name, + file_name, + selected_test_file_lines, + selected_ref_file_lines, + ) + roc_curve_error_number += errors + roc_curve_warning_number += warnings + + # Le recouvrement est possible si le nombre d'erreurs trouves specifiquement pour le calcul + # de l'AUC et des courbes de ROC correspond au nombre d'eerur total + assert roc_curve_error_number <= error_number_in_json_report_files + roc_curve_recovery = ( + roc_curve_error_number == error_number_in_json_report_files + ) + + # Recuperation effective des erreurs si possible + if roc_curve_recovery: + # Messages sur la recuperation + recovery_summary = "Recovery from AUC rough estimate" + recovery_message = utils.append_message(recovery_message, recovery_summary) + utils.write_message("\n" + recovery_summary + ":", log_file=log_file) + utils.write_message( + "\tall errors in json report file come from AUC rough estimate", + log_file=log_file, + ) + utils.write_message( + "\t" + + str(roc_curve_error_number) + + " errors in json report files converted to warnings", + log_file=log_file, + ) + utils.write_message( + "\t" + + str(error_number - roc_curve_error_number) + + " errors in evaluation xls files ignored and converted to warnings", + log_file=log_file, + ) + # On transforme les erreur en warning + warning_number += error_number + error_number = 0 + # On reinitialise egalement les stats d'erreur pour les extensions concernees + error_number_per_extension[".khj"] = 0 + error_number_per_extension[".xls"] = 0 + + # Tentative de recuperation des erreurs dans le cas tres particulier des caracteres accentues sous Windows, + # ou on observe un comportement local a la machine de developement sous Windows different de celui + # observe sur la machine Windows cloud, pourl aquelle certains fichiers sources avec caracteres + # accentues n'ont pas pu etre dezippes correctement et conduisent a des erreurs de lecture + # Dans ce cas uniquement, on tente de se comparer a une version linux de reference, pour laquelle + # on a le meme probleme et on observe le meme comportement + # Pas de recuperation d'erreur avancee si un contexte est force + if error_number > 0 and forced_context is None: + zip_encoding_recovery = True + + # On verifie d'abord que les conditions sont reunies + linux_context = None + if zip_encoding_recovery: + # On doit etre sous Windows + zip_encoding_recovery = results.get_context_platform_type() == "Windows" + + # Le fichier err.txt doit comporter une erreur de lecture + if zip_encoding_recovery: + read_error_pattern = ["error : File ./", " : Unable to open file ("] + err_file_path = os.path.join(results_dir, kht.ERR_TXT) + err_file_lines = utils.read_file_lines(err_file_path) + zip_encoding_recovery = err_file_lines is not None + # On doit trouver le pattern d'erreur + if zip_encoding_recovery: + line_index = utils.find_pattern_in_lines( + err_file_lines, read_error_pattern + ) + zip_encoding_recovery = line_index >= 0 + # La ligne concernee doit avoir un probleme de caracrete accentue + if zip_encoding_recovery: + erronneous_line = err_file_lines[line_index] + ascii_erronneous_line = erronneous_line.encode( + "ascii", "ignore" + ).decode("ascii") + zip_encoding_recovery = ascii_erronneous_line != erronneous_line + + # Il doit y avoir un des resultats de references specifiques pour Linux + if zip_encoding_recovery: + assert forced_context is None + windows_results_ref_dir, _ = results.get_results_ref_dir(test_dir) + linux_context = [results.get_context_computing_type(), "Linux"] + linux_results_ref_dir, _ = results.get_results_ref_dir( + test_dir, forced_context=linux_context + ) + zip_encoding_recovery = windows_results_ref_dir != linux_results_ref_dir + + # Comparaison des resultats de test avec ceux de reference sous linux + if zip_encoding_recovery: + results_ref_dir = os.path.join(test_dir, linux_results_ref_dir) + assert linux_context is not None + # Comparaison "pragmatique" entre les fichiers des repertoires de test et de reference + # en forcant le contexte, sans tentative de recuperation d'erreur avancee + zip_encoding_recovery = check_results( + test_dir, forced_context=linux_context + ) + + # Recuperation effective des erreurs si possible + if zip_encoding_recovery: + # Messages sur la recuperation + recovery_summary = ( + "Recovery from poor handling of accented file names by zip" + ) + recovery_message = utils.append_message(recovery_message, recovery_summary) + utils.write_message("\n" + recovery_summary + ":", log_file=log_file) + utils.write_message( + "\tcomparison for Windows test results is performed using Linux reference results", + log_file=log_file, + ) + utils.write_message( + "\t" + str(error_number) + " errors converted to warnings", + log_file=log_file, + ) + # On transforme les erreur en warning + warning_number += error_number + error_number = 0 + # On reinitialise egalement les stats d'erreur + for extension in error_number_per_extension: + error_number_per_extension[extension] = 0 + for file_name in kht.SPECIAL_ERROR_FILES: + special_error_file_error_numbers[file_name] = 0 + + # Message dedies aux fichiers speciaux + special_error_file_message = "" + for file_name in kht.SPECIAL_ERROR_FILES: + if special_error_file_error_numbers[file_name] > 0: + special_error_file_message = SUMMARY_SPECIAL_FILE_KEYS_PER_FILE[file_name] + break + + # Ecriture d'un resume synthetique + utils.write_message("\n" + SUMMARY_TITLE, log_file=log_file) + utils.write_message( + str(warning_number) + " " + SUMMARY_WARNING_KEY, log_file=log_file + ) + utils.write_message(str(error_number) + " " + SUMMARY_ERROR_KEY, log_file=log_file) + if special_error_file_message != "": + utils.write_message(special_error_file_message, log_file=log_file) + if error_number > 0: + # Tri des extensions + file_extensions = [] + for file_extension in error_number_per_extension: + file_extensions.append(file_extension) + file_extensions.sort() + # Message specifique si erreurs dans un seul type de fichier + if error_number_in_err_txt > 0: + extension_message = utils.append_message(extension_message, kht.ERR_TXT) + if error_number_in_err_txt == error_number: + specific_message = utils.append_message( + specific_message, "errors only in " + kht.ERR_TXT + ) + if len(file_extensions) > 0: + for file_extension in file_extensions: + extension_message = utils.append_message( + extension_message, file_extension + ) + if error_number_per_extension[file_extension] == error_number: + specific_message = utils.append_message( + specific_message, "errors only in " + file_extension + " files" + ) + # Ecriture des messages additionnels + if extension_message != "": + utils.write_message( + SUMMARY_FILE_TYPES_KEY + extension_message, log_file=log_file + ) + if specific_message != "": + utils.write_message(SUMMARY_NOTE_KEY + specific_message, log_file=log_file) + + # Ecriture d'un message additionnel lie a la portabilite + portability_message = utils.append_message(portability_message, recovery_message) + if portability_message != "": + utils.write_message( + SUMMARY_PORTABILITY_KEY + portability_message, log_file=log_file + ) + + # Affichage d'un message de fin sur la console si le contexte n'est pas force + if forced_context is None: + final_message = "--Comparison done : " + final_message += str(compared_files_number) + " files(s) compared, " + final_message += str(error_number) + " error(s), " + final_message += str(warning_number) + " warning(s)" + if special_error_file_message != "": + final_message += ", " + special_error_file_message + if recovery_message != "": + final_message += ", Recovery from errors" + print(final_message) + print(" log file: " + log_file_path + "\n") + return error_number == 0 + + +def is_file_with_json_extension(file_path): + """Test si le path d'un fichier correspond a un fichier json""" + # Recherche du fichier compare et de son extension + file_name = os.path.basename(file_path) + _, file_extension = os.path.splitext(file_name) + + # Extension json de base + json_file_extensions = [".json", ".khj", ".khvj", ".khcj", ".kdicj"] + # On rajoute les extension en les suffisant par "bad" pour permettre + # de gerer des tests de fichier corrects avec une extension erronnee + for extension in json_file_extensions.copy(): + json_file_extensions.append(extension + "bad") + is_json_file = file_extension in json_file_extensions + return is_json_file + + +def is_line_striped(line): + """Test si une ligne est stripee, sans caractere fin de ligne a la fin""" + return len(line) == 0 or line[-1] != "\n" + + +def strip_user_message_lines(lines): + """Renvoie la liste des lignes en ayant stripe toutes les lignes correspondant a + message utilisateur ('error' ou 'warning') + Permet ensuite de reperer simplement ces lignes dans une liste + """ + result_lines = [] + for line in lines: + if line.find("warning : ") == 0 or line.find("error : ") == 0: + line = line.strip() + result_lines.append(line) + return result_lines + + +def strip_user_message_lines_in_json_file(lines): + """Analyse d'un fichier json pour identifiant les sections 'messages' + contenant les messages utilisateur ('error' ou 'warning') + Les ligne correspondantes sont mise sous le meme format que dans le fichier + de log d'erreur en supprimant les caracteres '"' de debut et de fin + Ces lignes sont egalement stripees pour pouvoir les reperer simplement dans la + la liste de ligne retournee en sortie + + Remarque: on se base sur le formatge json en sortie des outils Khiops, + qui permet de faire une analyse simple ligne a ligne et de garder les numeros + de lignes corrects dans les message d'erreur, meme en cas de format json erronne + Une alternative par chargement direct d'un fichier json ne permettrait pas + ce type d'analyse et de diagnostic + """ + + def clean_message(message): + """Nettoyage d'une ligne de message, entre '"' et potentiellement suivi d'une ',' + Cela ne gere pas tous les cas d'encodage json, mais cela est suffisant la plupart du temps + """ + cleaned_message = message.strip() + # Cas d'un milieur de section, avec ',' en fin de ligne + if cleaned_message[-1] == ",": + cleaned_message = cleaned_message[1:-2] + # Cas d'une fin de section + else: + cleaned_message = cleaned_message[1:-1] + return cleaned_message + + # Recherche des lignes du fichier dans les sections "messages" + in_message_section = False + result_lines = [] + # Pretraitement des lignes + for line in lines: + # Cas ou est dans la section des message + if in_message_section: + # Detection de la fin de section + in_message_section = line.strip() != "]" + # Nettoyage des lignes dans la section message + if in_message_section: + line = clean_message(line) + # Cas hors de la section des message + else: + # Detection du debut de section + in_message_section = line.strip() == '"messages": [' + result_lines.append(line) + return result_lines + + +def discard_key_matching_lines_in_json_file(lines, pattern): + """Renvoie la sous-liste des lignes ne correspondant pas a la cle en parametre""" + result_lines = [] + searched_full_pattern = '"' + pattern + '": ' + for line in lines: + if line.strip().find(searched_full_pattern) != 0: + result_lines.append(line) + return result_lines + + +def extract_key_matching_lines_in_json_file(lines, pattern): + """Renvoie la sous-liste des lignes correspondant a la cle en parametre""" + result_lines = [] + searched_full_pattern = '"' + pattern + '": ' + for line in lines: + if line.strip().find(searched_full_pattern) == 0: + result_lines.append(line) + return result_lines + + +def extract_striped_lines(lines): + """Retourne la sous_liste des lignes stripees de la liste en entree""" + striped_lines = [] + for line in lines: + if is_line_striped(line): + striped_lines.append(line) + return striped_lines + + +def filter_sequential_messages_lines(lines, log_file=None): + """Filtrage des errors et warning sequentiel d'un ensemble de lignes + + En sequentiel, de nouveaux messages de type 100th ou ... + sont emis, alors qu'il sont absents en parallele + En les filtrant, on rend les versions sequentielle et parallele comparables + Retourne les ligne filtrees, avec un message dans le log sur le nombre de lignes filtrees + """ + + def is_specific_line_pair_sequential(line1, line2): + """Test si une paire de lignes correspond a un pattern de message sequentiel + Premiere ligne avec 100th, 1000th error ou warning + Seconde ligne avec '...' + """ + message_type = "" + if line1.find("warning : ") == 0: + message_type = "warning" + elif line1.find("error : ") == 0: + message_type = "error" + is_specific = message_type != "" + # La premiere ligne doit se terminer par un pattern de type '(100th warning)' + if is_specific: + line1 = line1.strip() + expected_end_line1 = "00th " + message_type + ")" + is_specific = ( + line1[len(line1) - len(expected_end_line1) :] == expected_end_line1 + ) + # La seconde ligne doit se terminer par ' : ...' + if is_specific: + is_specific = line2.find(message_type) == 0 + if is_specific: + line2 = line2.strip() + expected_end_line2 = " : ..." + is_specific = ( + line2[len(line2) - len(expected_end_line2) :] == expected_end_line2 + ) + return is_specific + + result_lines = [] + filtered_line_number = 0 + # Filtrage des lignes + i = 0 + line_number = len(lines) + while i < line_number: + line = lines[i] + # On ne traite pas la derniere ligne, qui n'a pas de ligne suivante + if i == line_number - 1: + result_lines.append(line) + else: + next_line = lines[i + 1] + # On saute deux lignes si elles sont specifique a des message en sequentiel + if is_specific_line_pair_sequential(line, next_line): + i += 1 + filtered_line_number += 2 + else: + result_lines.append(line) + i += 1 + # Message si lignes filtrees + if filtered_line_number > 0: + utils.write_message( + "Specific sequential messages (100th...): " + + str(filtered_line_number) + + " lines filtered", + log_file=log_file, + ) + return result_lines + + +""" Liste de motifs pour lesquels ont admet une variation normale s'il font parti de la comparaison + dans une paire de lignes. Dans ce cas, on ignore la comparaison +""" +RESILIENCE_USER_MESSAGE_PATTERNS = [ + [ + "system resources are not sufficient to run the task (need ", + " of additional memory)", + ], + [ + "error : ", + "Database basic stats ", + "Too much memory necessary to store the values of the target variable ", + " (more than ", + ], + [ + "warning : Evaluation Selective Naive Bayes : Not enough memory to compute the exact AUC: " + + "estimation made on a sub-sample of size " + ], + [ + "warning : Database ", + ": Record ", + " : Single instance ", + "uses too much memory (more than ", + " after reading ", + " secondary records ", + ], + ["error : ", " : Not enough memory "], +] + + +def check_file_lines( + ref_file_path: str, + test_file_path: str, + ref_file_lines, + test_file_lines, + log_file=None, +): + """ + Comparaison d'un fichier de test et d'un fichier de reference + Parametres: + - ref_file_path: chemin du fichier de reference + - test_file_path: chemin du fichier de test + - ref_file_lines: liste des lignes du fichier de reference + - test_file_lines: liste des lignes du fichier de test + - log file: fichier de log ouvert dans le quel des messages sont ecrits (seulement si log_file est specifie) + + Retourne + - errors: nombre d'erreurs + - warnings: nombre de warnings + - user_message_warnings: nombre de warnings lie a une tolerance sur la variation des messages utilisateurs + (ex: "too much memory") + + Les noms des fichiers en parametre permettent de specialiser les comparaisons selon le type de fichier + Les listes de lignes en entree permettent d'eviter de relire un fichier dont on connait le nom + et dont on a deja lu les lignes. + Cela permet par exemple de reutiliser les methodes de comparaison apres avoir filtre le fichier + de sous-parties que l'on ne souhaite pas comparer. + + Compare les fichiers ligne par ligne, champ par champ (separateur '\t'), et token par token + dans le cas des fichiers json ou dictionnaire + On a avec des tolerances selon le type de fichier. + Pour les valeurs numeriques, une difference relative de 0.00001 est toleree + - ecrit les difference dans le fichier log_file et affiche le nb d'erreur dans le terminal + - warning : 2 champs contiennent des valeurs numeriques avec une difference relative toleree + - error : les champs sont differents + """ + + def filter_time(value): + # Suppression d'un pattern de time d'une valeur + pos_start_time = value.find(" time:") + if pos_start_time >= 0: + begin_value = value[:pos_start_time] + end_value = value[pos_start_time + len(" time:") :] + end_value = end_value.strip() + pos_end_time = end_value.find(" ") + if pos_end_time >= 0: + end_value = end_value[pos_end_time:] + else: + end_value = "" + filtered_value = begin_value + " time: ..." + filter_time(end_value) + else: + filtered_value = value + return filtered_value + + def filter_khiops_temp_dir(value): + # Nettoyage de la partie temp directory d'une valeur + pos_khiops_temp_dir = value.find("~Khiops") + if pos_khiops_temp_dir >= 0: + # Recherche du debut du path du fichier + begin_pos = pos_khiops_temp_dir + while begin_pos > 0 and value[begin_pos] != " ": + begin_pos -= 1 + # Recherche de la fin du repertoire temporaire + end_pos = pos_khiops_temp_dir + while ( + end_pos < len(value) + and value[end_pos] != "/" + and value[end_pos] != "\\" + ): + end_pos += 1 + while end_pos < len(value) and ( + value[end_pos] == "/" or value[end_pos] == "\\" + ): + end_pos += 1 + # Remplacement du nom du repertoire par un nom "logique" + begin_value = value[0:begin_pos] + end_value = value[end_pos : len(value)] + # Recherche du nom de fichier en debut de la end_value qui suit le nom du repertoire temporaire + filtered_filename = "" + end_filename_pos = end_value.find(" ") + if end_filename_pos != -1: + filename = end_value[0:end_filename_pos] + end_value = end_value[end_filename_pos:] + else: + filename = end_value + end_value = "" + # Filtrage de l'eventuel nom de fichier en remplacant les chiffres par le pattern XXX + # pour se rendre independant des eventuels index de fichiers temporaires + pos = 0 + while pos < len(filename): + c = filename[pos] + if c != "_" and not c.isdigit(): + filtered_filename += c + else: + filtered_filename += "XXX" + while pos < len(filename): + c = filename[pos] + if c != "_" and not c.isdigit(): + filtered_filename += c + break + pos += 1 + pos += 1 + filtered_value = ( + begin_value + " KHIOPS_TMP_DIR/" + filtered_filename + end_value + ) + else: + filtered_value = value + return filtered_value + + # Verifications + assert ref_file_path != "", "Missing ref file path" + assert test_file_path != "", "Missing test file path" + assert ref_file_lines is not None, "Missing ref file lines" + assert test_file_lines is not None, "Missing test file lines" + + # Recherche du fichier compare et de son extension + file_name = os.path.basename(ref_file_path) + assert file_name == os.path.basename(test_file_path) + _, file_extension = os.path.splitext(file_name) + + # test si fichier de temps + is_time_file = file_name == kht.TIME_LOG + + # test si fichier histogramme + is_histogram_file = "histogram" in file_name and file_extension == ".log" + + # test si fichier d'erreur + is_error_file = file_name == kht.ERR_TXT + + # test si fichier de benchmark + is_benchmark_file = file_name == "benchmark.xls" + + # Test si fichier json + is_json_file = is_file_with_json_extension(file_name) + + # initialisation des nombres d'erreurs et de warning + errors = 0 + warnings = 0 + numerical_warnings = 0 # Lie a une tolerance dee difference de valeur numerique + user_message_warnings = ( + 0 # Lie a un pattern de message avec tolerance (ex: "Not enough memory") + ) + + # Pas de controle si fichier de temps + if is_time_file: + utils.write_message("OK", log_file=log_file) + return errors, warnings, user_message_warnings + + # Comparaison des nombres de lignes + file_ref_line_number = len(ref_file_lines) + file_test_line_number = len(test_file_lines) + if file_test_line_number != file_ref_line_number: + utils.write_message( + "test file has " + + str(file_test_line_number) + + " lines and reference file has " + + str(file_ref_line_number) + + " lines", + log_file=log_file, + ) + errors = errors + 1 + + # comparaison ligne a ligne + max_threshold = 0 + max_print_error = 10 + max_field_length = 100 + skip_benchmark_lines = False + line_number = min(file_ref_line_number, file_test_line_number) + for index in range(line_number): + line = index + 1 + line_ref = ref_file_lines[index].rstrip() + line_test = test_file_lines[index].rstrip() + + # Cas special des fichiers de benchmark: + # on saute les blocs de ligne dont le role est le reporting de temps de calcul + # ("Time" dans le premier champ d'entete) + if is_benchmark_file and line_ref.find("Time") != -1: + skip_benchmark_lines = True + continue + if is_benchmark_file and skip_benchmark_lines: + # fin de bloc si ligne vide + if line_ref.find("\t") == -1: + skip_benchmark_lines = False + if skip_benchmark_lines: + continue + + # Ok si lignes egales + if line_ref == line_test: + continue + + # Cas special du fichier d'erreur: on tronque les lignes qui font du reporting de temps de calcul (" time:") + if ( + is_error_file + and line_ref.find(" time: ") != -1 + and line_test.find(" time: ") != -1 + ): + line_ref = filter_time(line_ref) + line_test = filter_time(line_test) + + # Cas special du fichier d'erreur: on tronque les lignes de stats sur les records des tables + if is_error_file: + record_stats_pattern = [" Table ", " Records: "] + if ( + utils.find_pattern_in_line(line_ref, record_stats_pattern) == 0 + and utils.find_pattern_in_line(line_test, record_stats_pattern) == 0 + ): + line_ref = line_ref[: line_ref.find(record_stats_pattern[-1])] + line_test = line_test[: line_test.find(record_stats_pattern[-1])] + + # Cas special du fichier d'erreur: + # on saute les lignes qui font du reporting de temps de calcul ("interrupted ") + if ( + is_error_file + and line_ref.lower().find(" interrupted ") != -1 + and line_test.lower().find(" interrupted ") != -1 + ): + continue + + # Cas special du fichier d'erreur, pour le message "(Operation canceled)" qui n'est pas case sensitive + if is_error_file: + if line_ref.find("(Operation canceled)") != -1: + line_ref = line_ref.replace( + "(Operation canceled)", "(operation canceled)" + ) + if line_test.find("(Operation canceled)") != -1: + line_test = line_test.replace( + "(Operation canceled)", "(operation canceled)" + ) + + # Cas special du fichier d'erreur en coclustering: + # on saute les lignes d'ecriture de rapport intermediaire qui different par le temps + # ("Write intermediate coclustering report") + if ( + is_error_file + and line_ref.find("Write intermediate coclustering report") != -1 + and line_test.find("Write intermediate coclustering report") != -1 + ): + continue + + # Cas special du fichier d'histogramme: + # on tronque les lignes qui font du reporting de temps de calcul (" time\t") + if ( + is_histogram_file + and line_ref.find("time") != -1 + and line_test.find("time") != -1 + ): + line_ref = line_ref[: line_ref.find("time")] + line_test = line_test[: line_test.find("time")] + # Cas special du fichier d'histogramme: + # on ignore les ligne avec le numero de version + if ( + is_histogram_file + and line_ref.find("Version") != -1 + and line_test.find("Version") != -1 + ): + continue + + # Cas special du caractere # en tete de premiere ligne de fichier + # pour l'identifiant de version d'application (ex: #Khiops 10.2.0) + tool_version_pattern = ["#", " "] + if ( + line == 1 + and utils.find_pattern_in_line(line_ref, tool_version_pattern) == 0 + and utils.find_pattern_in_line(line_test, tool_version_pattern) == 0 + ): + continue + + # Cas special du champ version des fichiers json (identifiant de version d'application) + if ( + is_json_file + and line_ref.find('"version": ') >= 0 + and line_test.find('"version": ') >= 0 + ): + continue + + # Traitement des patterns toleres pour la comparaison + if is_error_file or is_json_file: + resilience_found = False + for pattern in RESILIENCE_USER_MESSAGE_PATTERNS: + if ( + utils.find_pattern_in_line(line_ref, pattern) != -1 + and utils.find_pattern_in_line(line_test, pattern) != -1 + ): + # On renvoie un warning, en indiquant qu'il s'agit d'un warning de resilience + warnings += 1 + user_message_warnings += 1 + # Ecriture d'un warning + utils.write_message( + "warning : line " + + str(line) + + " " + + line_test.strip() + + " -> " + + line_ref.strip(), + log_file=log_file, + ) + resilience_found = True + break + if resilience_found: + continue + + # Sinon, on analyse les champs + line_fields_ref = line_ref.split("\t") + line_fields_test = line_test.split("\t") + + # comparaison des nombres de champs + field_number_ref = len(line_fields_ref) + field_number_test = len(line_fields_test) + if field_number_ref != field_number_test: + if errors < max_print_error: + utils.write_message( + "test file (line " + + str(line) + + ") has " + + str(field_number_test) + + " columns and reference file has " + + str(field_number_ref) + + " columns", + log_file=log_file, + ) + elif errors == max_print_error: + utils.write_message("...", log_file=log_file) + errors = errors + 1 + + # comparaison des champs + field_number_length = min(field_number_ref, field_number_test) + for i in range(field_number_length): + field_ref = line_fields_ref[i] + field_test = line_fields_test[i] + + # parcours des lignes champ par champs + # cas special du fichier d'erreur ou json: on tronque les chemins vers les repertoires temporaires de Khiops + if ( + (is_error_file or is_json_file) + and field_ref.find("~Khiops") != -1 + and field_test.find("~Khiops") != -1 + ): + field_ref = filter_khiops_temp_dir(field_ref) + field_test = filter_khiops_temp_dir(field_test) + + # cas general de comparaison de champs + [eval_res, threshold_res] = check_field(field_ref, field_test) + + # truncature des champs affiches dans les messages d'erreur + if len(field_test) > max_field_length: + field_test = field_test[0:max_field_length] + "..." + if len(field_ref) > max_field_length: + field_ref = field_ref[0:max_field_length] + "..." + # messages d'erreur + if eval_res == 0: + if errors < max_print_error or threshold_res > max_threshold: + utils.write_message( + "line " + + str(line) + + " field " + + str(i + 1) + + " " + + field_test + + " -> " + + field_ref, + log_file=log_file, + ) + elif errors == max_print_error: + utils.write_message("...", log_file=log_file) + errors += 1 + elif eval_res == 2: + warnings += 1 + if threshold_res > 0: + numerical_warnings += 1 + max_threshold = max(threshold_res, max_threshold) + if warnings > 0: + if numerical_warnings > 0: + utils.write_message( + str(numerical_warnings) + " warning(s) (epsilon difference)", + log_file=log_file, + ) + if user_message_warnings > 0: + utils.write_message( + str(user_message_warnings) + + " warning(s) (resilience to specific user message patterns)", + log_file=log_file, + ) + if errors == 0: + utils.write_message("OK", log_file=log_file) + if errors > 0: + message = str(errors) + " error(s)" + if max_threshold > 0: + message += " (max relative difference: " + str(max_threshold) + ")" + utils.write_message(message, log_file=log_file) + return errors, warnings, user_message_warnings + + +def split_field(field_value): + """Decoupage d'un champ (champ d'une ligne avec separateur tabulation) + en un ensemble de tokens elementaire pour le parsing d'un fichier json ou kdic + Permet ensuite de comparer chaque valeur de token, pour avoir une tolerance par rapport aux + mirco-variations des valeurs numeriques""" + # Pour gerer les double-quotes a l'interieur des strings, pour les format json et kdic + field_value = field_value.replace('\\"', "'") + field_value = field_value.replace('""', "'") + sub_fields = TOKEN_PARSER.findall(field_value) + return sub_fields + + +def is_time(val): + """Indique si une valeur est de type temps hh:mm:ss.ms""" + return TIME_PARSER.match(val.strip()) + + +def check_value(val1, val2): + """Comparaison de deux valeurs numeriques + Renvoie deux valeur: + - result: + - 1 si les valeurs sont identiques + - 2 si les la difference relative est toleree + - 0 si les valeurs sont differentes + - threshold: difference relative si result = 2 + """ + # Ok si valeurs egales + if val1 == val2: + return [1, 0] + # Sinon, tentative de comparaison numerique + threshold = float(0.00001) + try: + float1 = float(val1) + float2 = float(val2) + res = ( + 0.5 * abs(float1 - float2) / (abs(float1) / 2 + abs(float2) / 2 + threshold) + ) + if res <= threshold: + return [2, res] + return [0, res] + # Erreur si format non numerique et difference + except ValueError: + return [0, 0] + + +def check_field(field1, field2): + """ " Comparaison de deux champs + Pour les valeurs numeriques, une diffence relative de 0.00001 est toleree + Renvoie deux valeur: + - result: + - 1 si les champs sont identiques + - 2 si les la difference relative est toleree (warning) + - 0 si les champs sont differents (error) + - threshold: difference relative liee au cas erreur ou warning + """ + if field1 == field2: + return [1, 0] + + # si les deux champs sont des time, on renvoie OK pour ignorer la comparaison + if is_time(field1) and is_time(field2): + return [1, 0] + + # uniformisation entre windows et linux pour les chemins de fichier + # on va remplacer les \ par des / + string1 = field1.replace("\\", "/") + string2 = field2.replace("\\", "/") + # Tolerance temporaire pour le passage au format hdfs + # hdfs_value1 = field1.replace("./", "") + # hdfs_value1 = hdfs_value1.replace(".\\/..\\/", "") + # hdfs_value1 = hdfs_value1.replace("..\\/", "") + # hdfs_value1 = hdfs_value1.replace(".\\/", "") + # hdfs_value2 = field2.replace("./", "") + # hdfs_value2 = hdfs_value2.replace(".\\/..\\/", "") + # hdfs_value2 = hdfs_value2.replace("..\\/", "") + # hdfs_value2 = hdfs_value2.replace(".\\/", "") + # if hdfs_value1 == hdfs_value2: + # return [1, 0] + if string1 == string2: + return [1, 0] + + # sinon c'est peut etre un probleme d'arrondi + # on accepte les differences relatives faibles + if NUMERIC_PARSER.match(field1) and NUMERIC_PARSER.match(field2): + [eval_result, threshold_result] = check_value(field1, field2) + return [eval_result, threshold_result] + else: + # on arrive pas a le convertir en float, ce n'est pas un nombre + # on decoupe chaque champ sous la forme d'un ensemble de sous-chaines qui sont soit + # des libelles, soit des float + sub_fields1 = split_field(field1) + sub_fields2 = split_field(field2) + + # nombre de sous-chaines differentes: il y a erreur + if len(sub_fields1) != len(sub_fields2): + return [0, 0] + # comparaison pas a pas + else: + i = 0 + length = len(sub_fields1) + warnings = 0 + errors = 0 + max_warning_threshold = 0 + max_error_threshold = 0 + while i < length: + [eval_result, threshold_result] = check_value( + sub_fields1[i], sub_fields2[i] + ) + # Traitement des erreurs + if eval_result == 0: + errors += 1 + max_error_threshold = max(threshold_result, max_error_threshold) + # Traitement des warnings + if eval_result == 2: + warnings += 1 + max_warning_threshold = max(threshold_result, max_warning_threshold) + i = i + 1 + if errors > 0: + return [0, max_error_threshold] + elif warnings > 0: + return [2, max_warning_threshold] + else: + return [1, 0] + + +def initialize_parsers(): + """Initialisation de parsers sont compile une fois pour toutes + Retourne les parsers de token, de numeric et de time + """ + # Delimiters pour les fichiers json et kdic + delimiters = [ + "\\,", + "\\{", + "\\}", + "\\[", + "\\]", + "\\:", + "\\(", + "\\)", + "\\<", + "\\>", + "\\=", + ] + numeric_pattern = "-?[0-9]+\\.?[0-9]*(?:[Ee]-?[0-9]+)?" + string_pattern = ( + '"[^"]*"' # Sans les double-quotes dans les strings (dur a parser...) + ) + time_pattern = "\\d{1,2}:\\d{2}:\\d{2}\\.?\\d*" + other_tokens = "[\\w]+" + tokens = time_pattern + "|" + numeric_pattern + "|" + string_pattern + for delimiter in delimiters: + tokens += "|" + delimiter + tokens += "|" + other_tokens + token_parser = re.compile(tokens) + numeric_parser = re.compile(numeric_pattern) + time_parser = re.compile(time_pattern) + return token_parser, numeric_parser, time_parser + + +# Parsers en variables globales, compiles une seule fois au chargement du module +# - le parser de tokens permet d'analyser de facon detaillee le contenu d'un +# fichier json ou dictionnaire (.kdic) en le decomposant en une suite de tokens +# separateur, valeur numerique opu categorielle entre double-quotes. +# - le parser de numerique est specialise pour les valeurs numeriques au format scientifique +# - le parser de time est specialise pour le format time hh:mm:ss.ms +TOKEN_PARSER, NUMERIC_PARSER, TIME_PARSER = initialize_parsers() diff --git a/test/LearningTestTool/py/_kht_constants.py b/test/LearningTestTool/py/_kht_constants.py index 66e888614..47f88d292 100644 --- a/test/LearningTestTool/py/_kht_constants.py +++ b/test/LearningTestTool/py/_kht_constants.py @@ -1,3 +1,7 @@ +# Copyright (c) 2024 Orange. All rights reserved. +# This software is distributed under the BSD 3-Clause-clear License, the text of which is available +# at https://spdx.org/licenses/BSD-3-Clause-Clear.html or see the "LICENSE" file for more details. + """ Constantes permettant la gestion de la structure des repertoires de LearningTest et l'analyse des resultats par repertoire de test diff --git a/test/LearningTestTool/py/_kht_families.py b/test/LearningTestTool/py/_kht_families.py index f45ab07e6..ad1bbf4df 100644 --- a/test/LearningTestTool/py/_kht_families.py +++ b/test/LearningTestTool/py/_kht_families.py @@ -1,3 +1,7 @@ +# Copyright (c) 2024 Orange. All rights reserved. +# This software is distributed under the BSD 3-Clause-clear License, the text of which is available +# at https://spdx.org/licenses/BSD-3-Clause-Clear.html or see the "LICENSE" file for more details. + import os import _kht_constants as kht diff --git a/test/LearningTestTool/py/_kht_one_shot_instructions.py b/test/LearningTestTool/py/_kht_one_shot_instructions.py index 389198c7f..48b703722 100644 --- a/test/LearningTestTool/py/_kht_one_shot_instructions.py +++ b/test/LearningTestTool/py/_kht_one_shot_instructions.py @@ -1,3 +1,7 @@ +# Copyright (c) 2024 Orange. All rights reserved. +# This software is distributed under the BSD 3-Clause-clear License, the text of which is available +# at https://spdx.org/licenses/BSD-3-Clause-Clear.html or see the "LICENSE" file for more details. + import os.path import sys import stat diff --git a/test/LearningTestTool/py/_kht_results_management.py b/test/LearningTestTool/py/_kht_results_management.py index 7b135f494..ca7ef853e 100644 --- a/test/LearningTestTool/py/_kht_results_management.py +++ b/test/LearningTestTool/py/_kht_results_management.py @@ -1,3 +1,7 @@ +# Copyright (c) 2024 Orange. All rights reserved. +# This software is distributed under the BSD 3-Clause-clear License, the text of which is available +# at https://spdx.org/licenses/BSD-3-Clause-Clear.html or see the "LICENSE" file for more details. + import os import platform diff --git a/test/LearningTestTool/py/_kht_standard_instructions.py b/test/LearningTestTool/py/_kht_standard_instructions.py index 798952ee7..2a6871741 100644 --- a/test/LearningTestTool/py/_kht_standard_instructions.py +++ b/test/LearningTestTool/py/_kht_standard_instructions.py @@ -1,3 +1,7 @@ +# Copyright (c) 2024 Orange. All rights reserved. +# This software is distributed under the BSD 3-Clause-clear License, the text of which is available +# at https://spdx.org/licenses/BSD-3-Clause-Clear.html or see the "LICENSE" file for more details. + import os.path import stat diff --git a/test/LearningTestTool/py/_kht_utils.py b/test/LearningTestTool/py/_kht_utils.py index 99cb01f76..34a1fa020 100644 --- a/test/LearningTestTool/py/_kht_utils.py +++ b/test/LearningTestTool/py/_kht_utils.py @@ -1,656 +1,662 @@ -import os -import os.path -import sys -import shutil -import stat -import argparse - -import _kht_constants as kht -import _kht_families as test_families - - -""" -Fonction utilitaires, notamment pour la gestion des fichiers et des messages -""" - -""" -Verification de la typologie des repertoires dans LearningTest -- test dir: repertoire d'un test elementaire (ex: `IrisLight`) -- suite dir: repertoire d'une famille de test, contenant un sous-repertoire par test (ex: Standard) -- tool dir: repertoire pour un outil, contenant un sous-repertoire par suite de test: (ex: TestKhiops) -- home dir: repertoire LearningTest, contenant les tool dirs, designe en externe par 'LearningTest dir' - -Les methodes suivante verifie qu'un path, relatif ou absolu, se termine par un repertoire de la typologie. -En cas d'erreur, un message est affiche est on sort du programme -""" - - -def check_test_dir(checked_dir): - """Test si un chemin est celui d'un repertoire de test""" - checked_home_dir_path = parent_dir_path(checked_dir, 3) - checked_tool_dir_name = parent_dir_name(checked_dir, 2) - if ( - not check_home_dir(checked_home_dir_path) - or checked_tool_dir_name not in kht.TOOL_DIR_NAMES.values() - ): - fatal_error(checked_dir + " should be a test directory of " + kht.LEARNING_TEST) - return True - - -def check_suite_dir(checked_dir): - """Test si un chemin est celui d'un repertoire de suite""" - checked_home_dir_path = parent_dir_path(checked_dir, 3) - checked_tool_dir_name = parent_dir_name(checked_dir, 1) - if ( - not check_home_dir(checked_home_dir_path) - or checked_tool_dir_name not in kht.TOOL_DIR_NAMES.values() - ): - fatal_error( - checked_dir + " should be a suite directory of " + kht.LEARNING_TEST - ) - return True - - -def check_tool_dir(checked_dir): - """Test si un chemin est celui d'un repertoire d'outil""" - checked_home_dir_path = parent_dir_path(checked_dir, 3) - checked_tool_dir_name = parent_dir_name(checked_dir, 0) - if ( - not check_home_dir(checked_home_dir_path) - or checked_tool_dir_name not in kht.TOOL_DIR_NAMES.values() - ): - fatal_error(checked_dir + " should be a tool directory of " + kht.LEARNING_TEST) - return True - - -def check_home_dir(checked_dir, fatal_error_if_false=True): - """Test si un chemin est celui du repertoire LearningTest""" - checked_home_dir_path = parent_dir_path(checked_dir, 0) - # On n'impose pas que le repertoire racine ait le nom predefini kht.LEARNING_TEST - # On verifie juste que le repertoire contient au moins un des repertoires d'outil - for name in kht.TOOL_DIR_NAMES.values(): - checked_tool_dir_name = os.path.join(checked_home_dir_path, name) - if os.path.isdir(checked_tool_dir_name): - return True - # Echec si aucun repertoire d'outil trouve - if fatal_error_if_false: - fatal_error( - checked_dir - + " should be a valid '" - + kht.LEARNING_TEST - + "' home dir, containing at least one the tools directory " - + list_to_label(kht.TOOL_DIR_NAMES.values()) - ) - return False - - -def get_learning_test_sub_dir_depth(checked_dir): - """Test si un chemin est correspond a un sous-repertoire de LearningTest - Renvoie la profondeur a laquelle se trouver LearningTest - - 0: home dir - - 1: tool dir - - 2: suite dir - - 3: test dir - """ - if not os.path.isdir(checked_dir): - fatal_error(checked_dir + " should be a directory") - checked_home_dir_path = os.path.realpath(checked_dir) - depth = 0 - while depth < 4: - if check_home_dir(checked_home_dir_path, fatal_error_if_false=False): - return depth - checked_home_dir_path = os.path.dirname(checked_home_dir_path) - depth += 1 - fatal_error( - checked_dir - + " must be in a directory tree located a maximum of three levels above a valid '" - + kht.LEARNING_TEST - + "' home dir, containing at least one the tools directory " - + list_to_label(kht.TOOL_DIR_NAMES.values()) - ) - - -def get_home_dir(home_dir): - """Retourne le repertoire de base LearningTest a partir d'un sous-repertoire de profondeur quelconque""" - # On remonte dans le chemin (reel) jusqu'a trouver le repertoire racine - checked_home_dir_path = os.path.realpath(home_dir) - depth = 0 - while depth < 4: - if check_home_dir(checked_home_dir_path, fatal_error_if_false=False): - return checked_home_dir_path - checked_home_dir_path = os.path.dirname(checked_home_dir_path) - depth += 1 - assert False, ( - "No valid '" + kht.LEARNING_TEST + "' home dir found in path " + home_dir - ) - - -def test_dir_name(test_dir): - """Nom du repertoire de test a partir du chemin repertoire de test""" - return parent_dir_name(test_dir, 0) - - -def suite_dir_name(test_dir): - """Nom du repertoire de suite a partir du chemin repertoire de test""" - return parent_dir_name(test_dir, 1) - - -def tool_dir_name(test_dir): - """Nom du repertoire d'outil a partir du chemin repertoire de test""" - return parent_dir_name(test_dir, 2) - - -def dir_name(dir_path): - """Renvoie le nom reel du repertoire de base d'un chemin, meme si le chemin est relatif""" - return parent_dir_name(dir_path, 0) - - -def parent_dir_path(dir_path, depth): - """Renvoie le chemin d'un repertoire parent a une profondeur donnee - Le nom est le nom reel absolu, meme si le parametre en entree est un chemin relatif - Ainsi, utiliser depth=0 permet d'avoir le nom reel du repertoire de base dans tous les cas - - Example: pour un test path dir_path=/LearningTest/TestKhiops/Standard/Iris/. - - test dir: parent_dir_name(dir_path, 0) -> Iris - - suite dir: parent_dir_name(dir_path, 1) -> Standard - - tool dir: parent_dir_name(dir_path, 2) -> TestKhiops - - home dir: parent_dir_name(dir_path, 3) -> LearningTest - """ - if not os.path.isdir(dir_path): - fatal_error(dir_path + " should be a valid directory") - # Recherche du parent avec un chemin relatif - relative_parent_path = dir_path - for i in range(depth): - relative_parent_path += "/.." - # Nom reel du chemin - real_parent_path = os.path.realpath(relative_parent_path) - return real_parent_path - - -def parent_dir_name(dir_path, depth): - """Renvoie le nom d'un repertoire parent a une profondeur donnee - Le nom est le nom reel absolu, meme si le parametre en entree est un chemin relatif - Ainsi, utiliser depth=0 permet d'avoir le nom reel du repertoire de base dans tous les cas - - Example: pour un test path dir_path=/LearningTest/TestKhiops/Standard/Iris/. - - test dir: parent_dir_name(dir_path, 0) -> Iris - - suite dir: parent_dir_name(dir_path, 1) -> Standard - - tool dir: parent_dir_name(dir_path, 2) -> TestKhiops - - home dir: parent_dir_name(dir_path, 3) -> LearningTest - """ - parent_path = parent_dir_path(dir_path, depth) - # On extrait le nom du repertoire - result_name = os.path.basename(parent_path) - return result_name - - -""" -Gestion des noms des binaires -""" - - -def is_valid_tool_full_exe_name(tool_exe_name): - """Indique si le nom du binaire fait partie des noms valides, avec prise - en compte des suffixes mpi pour les exe paralellisable""" - - return ( - tool_exe_name in kht.TOOL_EXE_NAMES.values() - or extract_tool_exe_name(tool_exe_name) in kht.PARALLEL_TOOL_NAMES - ) - - -def extract_tool_exe_name(tool_full_exe_name): - """Extrait le nom du binaire a partir d'un nom ayant potentiellement un suffixe mpi""" - - if tool_full_exe_name in kht.TOOL_EXE_NAMES.values(): - return tool_full_exe_name - for suffix in kht.TOOL_MPI_SUFFIXES: - if tool_full_exe_name.endswith(suffix): - return tool_full_exe_name.removesuffix(suffix) - - -""" -Gestion des messages utilisateurs -""" - - -def fatal_error(message): - """Affichage du message lie a l'erreur puis quit""" - print("error : " + message) - exit(1) - - -def write_message(message, log_file=None, show=False): - """Ecriture d'un message dans un fichier de log - Ecriture dans un fichier de log selon le le parametre log_file - Affichage sur la console selon le parametre show - Si ni log_file, ni show ne sont specifier, la methode est en mode silencieux - """ - cleaned_message = message.encode(encoding="utf-8", errors="ignore").decode( - encoding="utf-8" - ) - if show: - print(cleaned_message) - # on encode en utf-8 en ignorant les erreurs pour eviter un erreur lors de l'encodage automatique - if log_file is not None: - log_file.write(cleaned_message + "\n") - - -def append_message(initial_messages, message): - """Ajout d'un message a un message existant, en ajoutant si necessaire ', ' - pour separer les messages si les deux sont non vides - Retourne un message complete du nouveau message""" - if message == "": - return initial_messages - elif initial_messages == "": - return message - else: - return initial_messages + ", " + message - - -def list_to_label(value_list): - """Creation d'un libelle a partir de l'ensemble des valeurs d'une liste - sous la forme '(value1, value2..., valuek)'""" - label = "" - for value in value_list: - if label != "": - label += ", " - label += str(value) - label = "(" + label + ")" - return label - - -""" -Gestion des arguments commun de la ligne de commande, pour la librairie argparse -""" - - -def get_formatter_class(script_name): - """Variante de la classe de formatage a utiliser pour la creation des ArgumentParser - - pour montrer une option par ligne, en indentant legerement l'espace des noms des options - - pour permettre des aide multi-lignes, notamment dans l'epilogue - (je ne sais pas pourquoi il faut pas passer par un lambda expression: pas grave) - """ - argument_parser_formatter_class = lambda prog: argparse.RawTextHelpFormatter( - script_name, max_help_position=26 - ) - return argument_parser_formatter_class - - -def argument_parser_add_source_argument(parser): - """Ajout de l'argument source, sous repertoire dans l'arborescence LearningTest""" - parser.add_argument( - "source", - help="source directory, sub-dir in a " + kht.LEARNING_TEST + " tree", - ) - - -def argument_parser_add_dest_argument(parser): - """Ajout de l'argument dest, repertoire devant contenir une sous-partie d'une arborescence LearningTest""" - parser.add_argument( - "dest", - help="destination directory that contains the output " - + kht.LEARNING_TEST - + " tree", - ) - - -def argument_parser_add_family_argument(parser): - """Ajout de l'argument de famile de suites""" - parser.add_argument( - "-f", - "--family", - help="family of test suites among " - + ", ".join(test_families.TEST_FAMILIES) - + " (default: " - + test_families.FULL - + ")", - choices=test_families.TEST_FAMILIES, - default=test_families.FULL, - metavar="name", - action="store", - ) - - -def argument_parser_add_processes_argument(parser): - """Ajout de l'argument du nombre de process""" - parser.add_argument( - "-p", - "--processes", - help="number of processes (default: 1)", - dest="n", - type=int, - default=1, - metavar="n", - action="store", - ) - - -def argument_parser_add_forced_platform_argument(parser): - """Ajout de l'argument de plateforme forcee en remplacement de la plateforme courante""" - parser.add_argument( - "--forced-platform", - help="platform " - + list_to_label(kht.RESULTS_REF_TYPE_VALUES[kht.PLATFORM]) - + " used to compare results (default: current platform)", - choices=kht.RESULTS_REF_TYPE_VALUES[kht.PLATFORM], - metavar="plt", - action="store", - ) - - -def argument_parser_add_limit_test_time_arguments(parser): - """Ajout des argument de limite des temps de test""" - parser.add_argument( - "--min-test-time", - help="only for test dirs where reference test time (in file " - + kht.TIME_LOG - + ") is beyond a threshold", - type=float, - metavar="t", - action="store", - ) - parser.add_argument( - "--max-test-time", - help="only for test dirs where reference test time (in file " - + kht.TIME_LOG - + ") is below a threshold", - type=float, - metavar="t", - action="store", - ) - - -def argument_parser_check_source_argument(parser, source): - """Verification de l'argument source, a appeler apres les verification standard de parse_args() - On renvoie la decomposition du repertoire source sous la forme des champs: - home_dir, tool_dir_name, suite_dir_name, test_dir_name - """ - # Verification du repertoire a tester - learning_test_depth = get_learning_test_sub_dir_depth(source) - source_tool_dir_name = None - source_suite_dir_name = None - source_test_dir_name = None - if learning_test_depth == 3: - source_tool_dir_name = parent_dir_name(source, 2) - source_suite_dir_name = parent_dir_name(source, 1) - source_test_dir_name = parent_dir_name(source, 0) - elif learning_test_depth == 2: - source_tool_dir_name = parent_dir_name(source, 1) - source_suite_dir_name = parent_dir_name(source, 0) - elif learning_test_depth == 1: - source_tool_dir_name = parent_dir_name(source, 0) - if ( - source_tool_dir_name is not None - and source_tool_dir_name not in kht.TOOL_DIR_NAMES.values() - ): - parser.error( - "argument source: " - + source_tool_dir_name - + " in " - + os.path.realpath(source) - + " should be a tool dir " - + list_to_label(kht.TOOL_DIR_NAMES.values()) - ) - source_home_dir = get_home_dir(source) - return ( - source_home_dir, - source_tool_dir_name, - source_suite_dir_name, - source_test_dir_name, - ) - - -def argument_parser_check_destination_dir(parser, source_home_dir, destination_dir): - """Test si un chemin peut servir a aboutir a un repertoire racine - Il ne doit pas etre un sous-repertoire du repertoire de base""" - check_home_dir(source_home_dir) - home_dir = os.path.realpath(source_home_dir) - target_dir = os.path.realpath(destination_dir) - if (target_dir + os.path.sep).find(home_dir + os.path.sep) >= 0: - parser.error( - "argument dest: " - "destination dir " - + destination_dir - + " must not in the directory tree " - + home_dir - ) - - -def argument_parser_check_processes_argument(parser, processes): - """Verification de l'argument processes, a appeler apres les verification standard de parse_args()""" - max_process_number = 128 - if processes < 1: - parser.error("argument -p/--processes: min value is 1") - elif processes > max_process_number: - parser.error("argument -p/--processes: max value is " + str(max_process_number)) - - -def argument_parser_check_limit_test_time_arguments( - parser, min_test_time, max_test_time -): - """Verification des arguments de limites des temps de test, - a appeler apres les verification standard de parse_args()""" - if min_test_time is not None and min_test_time < 0: - parser.error("argument --min-test-time must be positive") - if max_test_time is not None and max_test_time < 0: - parser.error("argument --max-test-time must be positive") - - -""" -Gestion du contenu d'un fichier -""" - - -def read_file_lines(file_path, log_file=None, show=False): - """Chargement en memoire des lignes d'un fichier - Retourne la liste des fichiers si ok, None sinon - Ecrit un message dans le log en cas d'erreur - """ - # lecture des lignes du fichier - try: - with open(file_path, "r", errors="ignore") as file: - file_lines = file.readlines() - except BaseException as exception: - write_message( - "Error : can't open file " + file_path + " (" + str(exception) + ")", - log_file=log_file, - show=show, - ) - file_lines = None - return file_lines - - -def write_file_lines( - file_path, file_lines, striped_lines_suffix="\n", log_file=None, show=False -): - """Ecriture d'une liste de ligne dans un fichier - Ajoute un suffix aux lignes sans caractere fin de ligne - Ecrit un message dans le log en cas d'erreur - """ - # lecture des lignes du fichier - try: - with open(file_path, "w", errors="ignore") as file: - for line in file_lines: - file.write(line) - if len(line) == 0 or line[-1] != "\n": - file.write(striped_lines_suffix) - except BaseException as exception: - write_message( - "Error : can't open output file " + file_path + " (" + str(exception) + ")", - log_file=log_file, - show=show, - ) - - -""" -Gestion de patterns dans un fichier -Un patterns est une liste de sous-chaines devant se trouver en sequance dans une ligne -Exemples: -- la ligne "warning : unable to open file" contient le pattern elementaire ["warning"] - et le pattern complexe ["warning", "file"] -""" - - -def find_pattern_in_line(line, pattern): - """Renvoie la position de la premiere sous-chaine d'un pattern si une ligne contient un pattern - Retourne -1 sinon""" - assert isinstance(pattern, list) - pos = 0 - first_pos = None - for sub_pattern in pattern: - pos = line[pos:].find(sub_pattern) - if first_pos is None: - first_pos = pos - if pos == -1: - return -1 - if first_pos is None: - return -1 - else: - return first_pos - - -def find_pattern_in_lines(lines, pattern): - """Recherche d'un pattern un ensemble de lignes - Renvoie l'index de la premiere ligne contenant le pattern, -1 sinon""" - assert isinstance(lines, list) - assert isinstance(pattern, list) - for i, line in enumerate(lines): - if find_pattern_in_line(line, pattern) != -1: - return i - return -1 - - -def filter_lines_with_pattern(lines, pattern): - """Retourne les lignes sans celles contenant le pattern en parametre""" - assert isinstance(lines, list) - assert isinstance(pattern, list) - output_lines = [] - for line in lines: - if find_pattern_in_line(line, pattern) == -1: - output_lines.append(line) - return output_lines - - -def filter_copyright_lines(lines): - """Retourne les lignes sans les lignes de copyright, presentes en mode UI""" - assert isinstance(lines, list) - output_lines = lines - is_copyright = False - if len(lines) >= 2: - is_copyright = ( - find_pattern_in_line(lines[1], ["(c)", "Orange - All rights reserved."]) - != -1 - ) - if is_copyright: - output_lines = lines[2:] - return output_lines - - -def filter_process_id_prefix_from_lines(lines): - """Retourne les lignes sans l'eventuel prefixe de process id, du type '[0] ' - qui est emis par mpiexce dans les sorties standard""" - output_lines = [] - for line in lines: - # En parallelle, une ligne vide peut contenir le numero du process entre crochets - pos_end = -1 - is_process_id = len(line) > 0 and line[0] == "[" - if is_process_id: - pos_end = line.find("]") - is_process_id = pos_end > 0 and line[1:pos_end].isdigit() - if is_process_id: - line = line[pos_end + 1 :].lstrip() - output_lines.append(line) - return output_lines - - -def filter_empty_lines(lines): - """Retourne les lignes sans les lignes vides""" - output_lines = [] - for line in lines: - if line.strip() != "": - output_lines.append(line) - return output_lines - - -""" -Gestion des fichiers et repertoires -""" - - -def copy_file(src_file_path, dest_file_path): - """Copie d'un fichier, avec message d'erreur""" - try: - shutil.copy(src_file_path, dest_file_path) - except BaseException as message: - print("can't copy " + src_file_path + " (" + str(message) + ")") - - -def remove_file(file_path): - """Suppression d'un fichier, avec message d'erreur""" - try: - os.chmod(file_path, stat.S_IWRITE) - os.remove(file_path) - except (IOError, os.error) as why: - print("Cannot remove file %s: %s" % (file_path, str(why))) - - -def make_dir(dest_dir): - """Creation d'un repertoire, avec message d'erreur""" - try: - os.mkdir(dest_dir) - except (IOError, os.error) as why: - print("Cannot create directory %s: %s" % (dest_dir, str(why))) - - -def remove_dir(dir_to_remove): - """Suppression d'un repertoire cense etre vide, avec message d'erreur""" - try: - os.rmdir(dir_to_remove) - except (IOError, os.error) as why: - print("Cannot remove directory %s: %s" % (dir_to_remove, str(why))) - - -def sub_dirs(source_dir): - """Renvoie la liste des sous-repertoire d'un repertoire, sans message d'erreur""" - result_sub_dirs = [] - if os.path.isdir(source_dir): - try: - list_dir = os.listdir(source_dir) - for name in list_dir: - if os.path.isdir(os.path.join(source_dir, name)): - result_sub_dirs.append(name) - except (IOError, os.error): - pass - return result_sub_dirs - - -def set_flushed_outputs(): - """Flush systematique des sorties standard et d'erreur""" - sys.stdout = Unbuffered(sys.stdout) - sys.stderr = Unbuffered(sys.stderr) - - -class Unbuffered: - """Pour ouvrir un fichier avec un flush systematique - usage: par exemple, appeler sys.stdout = Unbuffered(sys.stdout) pour que toutes les sorties standard - soit immediatement affichees dans le shell, sans bufferisation - """ - - def __init__(self, stream): - self.stream = stream - - def write(self, data): - # on encode en utf-8 en ignorant les erreurs pour eviter un erreur lors de l'encodage automatique - self.stream.write(data.encode("utf-8", "ignore").decode("utf-8")) - self.stream.flush() - - def writelines(self, datas): - # on encode en utf-8 en ignorant les erreurs pour eviter un erreur lors de l'encodage automatique - self.stream.writelines( - [data.encode("utf-8", "ignore").decode("utf-8") for data in datas] - ) - self.stream.flush() - - def __getattr__(self, attr): - return getattr(self.stream, attr) +# Copyright (c) 2024 Orange. All rights reserved. +# This software is distributed under the BSD 3-Clause-clear License, the text of which is available +# at https://spdx.org/licenses/BSD-3-Clause-Clear.html or see the "LICENSE" file for more details. + +import os +import os.path +import sys +import shutil +import stat +import argparse + +import _kht_constants as kht +import _kht_families as test_families + + +""" +Fonction utilitaires, notamment pour la gestion des fichiers et des messages +""" + +""" +Verification de la typologie des repertoires dans LearningTest +- test dir: repertoire d'un test elementaire (ex: `IrisLight`) +- suite dir: repertoire d'une famille de test, contenant un sous-repertoire par test (ex: Standard) +- tool dir: repertoire pour un outil, contenant un sous-repertoire par suite de test: (ex: TestKhiops) +- home dir: repertoire LearningTest, contenant les tool dirs, designe en externe par 'LearningTest dir' + +Les methodes suivante verifie qu'un path, relatif ou absolu, se termine par un repertoire de la typologie. +En cas d'erreur, un message est affiche est on sort du programme +""" + + +def check_test_dir(checked_dir): + """Test si un chemin est celui d'un repertoire de test""" + checked_home_dir_path = parent_dir_path(checked_dir, 3) + checked_tool_dir_name = parent_dir_name(checked_dir, 2) + if ( + not check_home_dir(checked_home_dir_path) + or checked_tool_dir_name not in kht.TOOL_DIR_NAMES.values() + ): + fatal_error(checked_dir + " should be a test directory of " + kht.LEARNING_TEST) + return True + + +def check_suite_dir(checked_dir): + """Test si un chemin est celui d'un repertoire de suite""" + checked_home_dir_path = parent_dir_path(checked_dir, 3) + checked_tool_dir_name = parent_dir_name(checked_dir, 1) + if ( + not check_home_dir(checked_home_dir_path) + or checked_tool_dir_name not in kht.TOOL_DIR_NAMES.values() + ): + fatal_error( + checked_dir + " should be a suite directory of " + kht.LEARNING_TEST + ) + return True + + +def check_tool_dir(checked_dir): + """Test si un chemin est celui d'un repertoire d'outil""" + checked_home_dir_path = parent_dir_path(checked_dir, 3) + checked_tool_dir_name = parent_dir_name(checked_dir, 0) + if ( + not check_home_dir(checked_home_dir_path) + or checked_tool_dir_name not in kht.TOOL_DIR_NAMES.values() + ): + fatal_error(checked_dir + " should be a tool directory of " + kht.LEARNING_TEST) + return True + + +def check_home_dir(checked_dir, fatal_error_if_false=True): + """Test si un chemin est celui du repertoire LearningTest""" + checked_home_dir_path = parent_dir_path(checked_dir, 0) + # On n'impose pas que le repertoire racine ait le nom predefini kht.LEARNING_TEST + # On verifie juste que le repertoire contient au moins un des repertoires d'outil + for name in kht.TOOL_DIR_NAMES.values(): + checked_tool_dir_name = os.path.join(checked_home_dir_path, name) + if os.path.isdir(checked_tool_dir_name): + return True + # Echec si aucun repertoire d'outil trouve + if fatal_error_if_false: + fatal_error( + checked_dir + + " should be a valid '" + + kht.LEARNING_TEST + + "' home dir, containing at least one the tools directory " + + list_to_label(kht.TOOL_DIR_NAMES.values()) + ) + return False + + +def get_learning_test_sub_dir_depth(checked_dir): + """Test si un chemin est correspond a un sous-repertoire de LearningTest + Renvoie la profondeur a laquelle se trouver LearningTest + - 0: home dir + - 1: tool dir + - 2: suite dir + - 3: test dir + """ + if not os.path.isdir(checked_dir): + fatal_error(checked_dir + " should be a directory") + checked_home_dir_path = os.path.realpath(checked_dir) + depth = 0 + while depth < 4: + if check_home_dir(checked_home_dir_path, fatal_error_if_false=False): + return depth + checked_home_dir_path = os.path.dirname(checked_home_dir_path) + depth += 1 + fatal_error( + checked_dir + + " must be in a directory tree located a maximum of three levels above a valid '" + + kht.LEARNING_TEST + + "' home dir, containing at least one the tools directory " + + list_to_label(kht.TOOL_DIR_NAMES.values()) + ) + + +def get_home_dir(home_dir): + """Retourne le repertoire de base LearningTest a partir d'un sous-repertoire de profondeur quelconque""" + # On remonte dans le chemin (reel) jusqu'a trouver le repertoire racine + checked_home_dir_path = os.path.realpath(home_dir) + depth = 0 + while depth < 4: + if check_home_dir(checked_home_dir_path, fatal_error_if_false=False): + return checked_home_dir_path + checked_home_dir_path = os.path.dirname(checked_home_dir_path) + depth += 1 + assert False, ( + "No valid '" + kht.LEARNING_TEST + "' home dir found in path " + home_dir + ) + + +def test_dir_name(test_dir): + """Nom du repertoire de test a partir du chemin repertoire de test""" + return parent_dir_name(test_dir, 0) + + +def suite_dir_name(test_dir): + """Nom du repertoire de suite a partir du chemin repertoire de test""" + return parent_dir_name(test_dir, 1) + + +def tool_dir_name(test_dir): + """Nom du repertoire d'outil a partir du chemin repertoire de test""" + return parent_dir_name(test_dir, 2) + + +def dir_name(dir_path): + """Renvoie le nom reel du repertoire de base d'un chemin, meme si le chemin est relatif""" + return parent_dir_name(dir_path, 0) + + +def parent_dir_path(dir_path, depth): + """Renvoie le chemin d'un repertoire parent a une profondeur donnee + Le nom est le nom reel absolu, meme si le parametre en entree est un chemin relatif + Ainsi, utiliser depth=0 permet d'avoir le nom reel du repertoire de base dans tous les cas + + Example: pour un test path dir_path=/LearningTest/TestKhiops/Standard/Iris/. + - test dir: parent_dir_name(dir_path, 0) -> Iris + - suite dir: parent_dir_name(dir_path, 1) -> Standard + - tool dir: parent_dir_name(dir_path, 2) -> TestKhiops + - home dir: parent_dir_name(dir_path, 3) -> LearningTest + """ + if not os.path.isdir(dir_path): + fatal_error(dir_path + " should be a valid directory") + # Recherche du parent avec un chemin relatif + relative_parent_path = dir_path + for i in range(depth): + relative_parent_path += "/.." + # Nom reel du chemin + real_parent_path = os.path.realpath(relative_parent_path) + return real_parent_path + + +def parent_dir_name(dir_path, depth): + """Renvoie le nom d'un repertoire parent a une profondeur donnee + Le nom est le nom reel absolu, meme si le parametre en entree est un chemin relatif + Ainsi, utiliser depth=0 permet d'avoir le nom reel du repertoire de base dans tous les cas + + Example: pour un test path dir_path=/LearningTest/TestKhiops/Standard/Iris/. + - test dir: parent_dir_name(dir_path, 0) -> Iris + - suite dir: parent_dir_name(dir_path, 1) -> Standard + - tool dir: parent_dir_name(dir_path, 2) -> TestKhiops + - home dir: parent_dir_name(dir_path, 3) -> LearningTest + """ + parent_path = parent_dir_path(dir_path, depth) + # On extrait le nom du repertoire + result_name = os.path.basename(parent_path) + return result_name + + +""" +Gestion des noms des binaires +""" + + +def is_valid_tool_full_exe_name(tool_exe_name): + """Indique si le nom du binaire fait partie des noms valides, avec prise + en compte des suffixes mpi pour les exe paralellisable""" + + return ( + tool_exe_name in kht.TOOL_EXE_NAMES.values() + or extract_tool_exe_name(tool_exe_name) in kht.PARALLEL_TOOL_NAMES + ) + + +def extract_tool_exe_name(tool_full_exe_name): + """Extrait le nom du binaire a partir d'un nom ayant potentiellement un suffixe mpi""" + + if tool_full_exe_name in kht.TOOL_EXE_NAMES.values(): + return tool_full_exe_name + for suffix in kht.TOOL_MPI_SUFFIXES: + if tool_full_exe_name.endswith(suffix): + # TODO: Replace with `return tool_full_exe_name.removesuffix(suffix)` + # as soon as Python 3.8 support is dropped + return tool_full_exe_name[: tool_full_exe_name.index(suffix)] + + +""" +Gestion des messages utilisateurs +""" + + +def fatal_error(message): + """Affichage du message lie a l'erreur puis quit""" + print("error : " + message) + exit(1) + + +def write_message(message, log_file=None, show=False): + """Ecriture d'un message dans un fichier de log + Ecriture dans un fichier de log selon le le parametre log_file + Affichage sur la console selon le parametre show + Si ni log_file, ni show ne sont specifier, la methode est en mode silencieux + """ + cleaned_message = message.encode(encoding="utf-8", errors="ignore").decode( + encoding="utf-8" + ) + if show: + print(cleaned_message) + # on encode en utf-8 en ignorant les erreurs pour eviter un erreur lors de l'encodage automatique + if log_file is not None: + log_file.write(cleaned_message + "\n") + + +def append_message(initial_messages, message): + """Ajout d'un message a un message existant, en ajoutant si necessaire ', ' + pour separer les messages si les deux sont non vides + Retourne un message complete du nouveau message""" + if message == "": + return initial_messages + elif initial_messages == "": + return message + else: + return initial_messages + ", " + message + + +def list_to_label(value_list): + """Creation d'un libelle a partir de l'ensemble des valeurs d'une liste + sous la forme '(value1, value2..., valuek)'""" + label = "" + for value in value_list: + if label != "": + label += ", " + label += str(value) + label = "(" + label + ")" + return label + + +""" +Gestion des arguments commun de la ligne de commande, pour la librairie argparse +""" + + +def get_formatter_class(script_name): + """Variante de la classe de formatage a utiliser pour la creation des ArgumentParser + - pour montrer une option par ligne, en indentant legerement l'espace des noms des options + - pour permettre des aide multi-lignes, notamment dans l'epilogue + (je ne sais pas pourquoi il faut pas passer par un lambda expression: pas grave) + """ + argument_parser_formatter_class = lambda prog: argparse.RawTextHelpFormatter( + script_name, max_help_position=26 + ) + return argument_parser_formatter_class + + +def argument_parser_add_source_argument(parser): + """Ajout de l'argument source, sous repertoire dans l'arborescence LearningTest""" + parser.add_argument( + "source", + help="source directory, sub-dir in a " + kht.LEARNING_TEST + " tree", + ) + + +def argument_parser_add_dest_argument(parser): + """Ajout de l'argument dest, repertoire devant contenir une sous-partie d'une arborescence LearningTest""" + parser.add_argument( + "dest", + help="destination directory that contains the output " + + kht.LEARNING_TEST + + " tree", + ) + + +def argument_parser_add_family_argument(parser): + """Ajout de l'argument de famile de suites""" + parser.add_argument( + "-f", + "--family", + help="family of test suites among " + + ", ".join(test_families.TEST_FAMILIES) + + " (default: " + + test_families.FULL + + ")", + choices=test_families.TEST_FAMILIES, + default=test_families.FULL, + metavar="name", + action="store", + ) + + +def argument_parser_add_processes_argument(parser): + """Ajout de l'argument du nombre de process""" + parser.add_argument( + "-p", + "--processes", + help="number of processes (default: 1)", + dest="n", + type=int, + default=1, + metavar="n", + action="store", + ) + + +def argument_parser_add_forced_platform_argument(parser): + """Ajout de l'argument de plateforme forcee en remplacement de la plateforme courante""" + parser.add_argument( + "--forced-platform", + help="platform " + + list_to_label(kht.RESULTS_REF_TYPE_VALUES[kht.PLATFORM]) + + " used to compare results (default: current platform)", + choices=kht.RESULTS_REF_TYPE_VALUES[kht.PLATFORM], + metavar="plt", + action="store", + ) + + +def argument_parser_add_limit_test_time_arguments(parser): + """Ajout des argument de limite des temps de test""" + parser.add_argument( + "--min-test-time", + help="only for test dirs where reference test time (in file " + + kht.TIME_LOG + + ") is beyond a threshold", + type=float, + metavar="t", + action="store", + ) + parser.add_argument( + "--max-test-time", + help="only for test dirs where reference test time (in file " + + kht.TIME_LOG + + ") is below a threshold", + type=float, + metavar="t", + action="store", + ) + + +def argument_parser_check_source_argument(parser, source): + """Verification de l'argument source, a appeler apres les verification standard de parse_args() + On renvoie la decomposition du repertoire source sous la forme des champs: + home_dir, tool_dir_name, suite_dir_name, test_dir_name + """ + # Verification du repertoire a tester + learning_test_depth = get_learning_test_sub_dir_depth(source) + source_tool_dir_name = None + source_suite_dir_name = None + source_test_dir_name = None + if learning_test_depth == 3: + source_tool_dir_name = parent_dir_name(source, 2) + source_suite_dir_name = parent_dir_name(source, 1) + source_test_dir_name = parent_dir_name(source, 0) + elif learning_test_depth == 2: + source_tool_dir_name = parent_dir_name(source, 1) + source_suite_dir_name = parent_dir_name(source, 0) + elif learning_test_depth == 1: + source_tool_dir_name = parent_dir_name(source, 0) + if ( + source_tool_dir_name is not None + and source_tool_dir_name not in kht.TOOL_DIR_NAMES.values() + ): + parser.error( + "argument source: " + + source_tool_dir_name + + " in " + + os.path.realpath(source) + + " should be a tool dir " + + list_to_label(kht.TOOL_DIR_NAMES.values()) + ) + source_home_dir = get_home_dir(source) + return ( + source_home_dir, + source_tool_dir_name, + source_suite_dir_name, + source_test_dir_name, + ) + + +def argument_parser_check_destination_dir(parser, source_home_dir, destination_dir): + """Test si un chemin peut servir a aboutir a un repertoire racine + Il ne doit pas etre un sous-repertoire du repertoire de base""" + check_home_dir(source_home_dir) + home_dir = os.path.realpath(source_home_dir) + target_dir = os.path.realpath(destination_dir) + if (target_dir + os.path.sep).find(home_dir + os.path.sep) >= 0: + parser.error( + "argument dest: " + "destination dir " + + destination_dir + + " must not in the directory tree " + + home_dir + ) + + +def argument_parser_check_processes_argument(parser, processes): + """Verification de l'argument processes, a appeler apres les verification standard de parse_args()""" + max_process_number = 128 + if processes < 1: + parser.error("argument -p/--processes: min value is 1") + elif processes > max_process_number: + parser.error("argument -p/--processes: max value is " + str(max_process_number)) + + +def argument_parser_check_limit_test_time_arguments( + parser, min_test_time, max_test_time +): + """Verification des arguments de limites des temps de test, + a appeler apres les verification standard de parse_args()""" + if min_test_time is not None and min_test_time < 0: + parser.error("argument --min-test-time must be positive") + if max_test_time is not None and max_test_time < 0: + parser.error("argument --max-test-time must be positive") + + +""" +Gestion du contenu d'un fichier +""" + + +def read_file_lines(file_path, log_file=None, show=False): + """Chargement en memoire des lignes d'un fichier + Retourne la liste des fichiers si ok, None sinon + Ecrit un message dans le log en cas d'erreur + """ + # lecture des lignes du fichier + try: + with open(file_path, "r", errors="ignore") as file: + file_lines = file.readlines() + except BaseException as exception: + write_message( + "Error : can't open file " + file_path + " (" + str(exception) + ")", + log_file=log_file, + show=show, + ) + file_lines = None + return file_lines + + +def write_file_lines( + file_path, file_lines, striped_lines_suffix="\n", log_file=None, show=False +): + """Ecriture d'une liste de ligne dans un fichier + Ajoute un suffix aux lignes sans caractere fin de ligne + Ecrit un message dans le log en cas d'erreur + """ + # lecture des lignes du fichier + try: + with open(file_path, "w", errors="ignore") as file: + for line in file_lines: + file.write(line) + if len(line) == 0 or line[-1] != "\n": + file.write(striped_lines_suffix) + except BaseException as exception: + write_message( + "Error : can't open output file " + file_path + " (" + str(exception) + ")", + log_file=log_file, + show=show, + ) + + +""" +Gestion de patterns dans un fichier +Un patterns est une liste de sous-chaines devant se trouver en sequance dans une ligne +Exemples: +- la ligne "warning : unable to open file" contient le pattern elementaire ["warning"] + et le pattern complexe ["warning", "file"] +""" + + +def find_pattern_in_line(line, pattern): + """Renvoie la position de la premiere sous-chaine d'un pattern si une ligne contient un pattern + Retourne -1 sinon""" + assert isinstance(pattern, list) + pos = 0 + first_pos = None + for sub_pattern in pattern: + pos = line[pos:].find(sub_pattern) + if first_pos is None: + first_pos = pos + if pos == -1: + return -1 + if first_pos is None: + return -1 + else: + return first_pos + + +def find_pattern_in_lines(lines, pattern): + """Recherche d'un pattern un ensemble de lignes + Renvoie l'index de la premiere ligne contenant le pattern, -1 sinon""" + assert isinstance(lines, list) + assert isinstance(pattern, list) + for i, line in enumerate(lines): + if find_pattern_in_line(line, pattern) != -1: + return i + return -1 + + +def filter_lines_with_pattern(lines, pattern): + """Retourne les lignes sans celles contenant le pattern en parametre""" + assert isinstance(lines, list) + assert isinstance(pattern, list) + output_lines = [] + for line in lines: + if find_pattern_in_line(line, pattern) == -1: + output_lines.append(line) + return output_lines + + +def filter_copyright_lines(lines): + """Retourne les lignes sans les lignes de copyright, presentes en mode UI""" + assert isinstance(lines, list) + output_lines = lines + is_copyright = False + if len(lines) >= 2: + is_copyright = ( + find_pattern_in_line(lines[1], ["(c)", "Orange - All rights reserved."]) + != -1 + ) + if is_copyright: + output_lines = lines[2:] + return output_lines + + +def filter_process_id_prefix_from_lines(lines): + """Retourne les lignes sans l'eventuel prefixe de process id, du type '[0] ' + qui est emis par mpiexce dans les sorties standard""" + output_lines = [] + for line in lines: + # En parallelle, une ligne vide peut contenir le numero du process entre crochets + pos_end = -1 + is_process_id = len(line) > 0 and line[0] == "[" + if is_process_id: + pos_end = line.find("]") + is_process_id = pos_end > 0 and line[1:pos_end].isdigit() + if is_process_id: + line = line[pos_end + 1 :].lstrip() + output_lines.append(line) + return output_lines + + +def filter_empty_lines(lines): + """Retourne les lignes sans les lignes vides""" + output_lines = [] + for line in lines: + if line.strip() != "": + output_lines.append(line) + return output_lines + + +""" +Gestion des fichiers et repertoires +""" + + +def copy_file(src_file_path, dest_file_path): + """Copie d'un fichier, avec message d'erreur""" + try: + shutil.copy(src_file_path, dest_file_path) + except BaseException as message: + print("can't copy " + src_file_path + " (" + str(message) + ")") + + +def remove_file(file_path): + """Suppression d'un fichier, avec message d'erreur""" + try: + os.chmod(file_path, stat.S_IWRITE) + os.remove(file_path) + except (IOError, os.error) as why: + print("Cannot remove file %s: %s" % (file_path, str(why))) + + +def make_dir(dest_dir): + """Creation d'un repertoire, avec message d'erreur""" + try: + os.mkdir(dest_dir) + except (IOError, os.error) as why: + print("Cannot create directory %s: %s" % (dest_dir, str(why))) + + +def remove_dir(dir_to_remove): + """Suppression d'un repertoire cense etre vide, avec message d'erreur""" + try: + os.rmdir(dir_to_remove) + except (IOError, os.error) as why: + print("Cannot remove directory %s: %s" % (dir_to_remove, str(why))) + + +def sub_dirs(source_dir): + """Renvoie la liste des sous-repertoire d'un repertoire, sans message d'erreur""" + result_sub_dirs = [] + if os.path.isdir(source_dir): + try: + list_dir = os.listdir(source_dir) + for name in list_dir: + if os.path.isdir(os.path.join(source_dir, name)): + result_sub_dirs.append(name) + except (IOError, os.error): + pass + return result_sub_dirs + + +def set_flushed_outputs(): + """Flush systematique des sorties standard et d'erreur""" + sys.stdout = Unbuffered(sys.stdout) + sys.stderr = Unbuffered(sys.stderr) + + +class Unbuffered: + """Pour ouvrir un fichier avec un flush systematique + usage: par exemple, appeler sys.stdout = Unbuffered(sys.stdout) pour que toutes les sorties standard + soit immediatement affichees dans le shell, sans bufferisation + """ + + def __init__(self, stream): + self.stream = stream + + def write(self, data): + # on encode en utf-8 en ignorant les erreurs pour eviter un erreur lors de l'encodage automatique + self.stream.write(data.encode("utf-8", "ignore").decode("utf-8")) + self.stream.flush() + + def writelines(self, datas): + # on encode en utf-8 en ignorant les erreurs pour eviter un erreur lors de l'encodage automatique + self.stream.writelines( + [data.encode("utf-8", "ignore").decode("utf-8") for data in datas] + ) + self.stream.flush() + + def __getattr__(self, attr): + return getattr(self.stream, attr) diff --git a/test/LearningTestTool/py/kht_apply.py b/test/LearningTestTool/py/kht_apply.py index 701a46350..e55f9fe9a 100644 --- a/test/LearningTestTool/py/kht_apply.py +++ b/test/LearningTestTool/py/kht_apply.py @@ -1,283 +1,287 @@ -import os.path -import sys -import argparse - -import _kht_constants as kht -import _kht_utils as utils -import _kht_families as test_families -import _kht_results_management as results -import _kht_standard_instructions as standard_instructions -import _kht_one_shot_instructions as one_shot_instructions - -""" -Gestion de l'ensemble des instructions -""" - - -def apply_instruction_on_suite_dir( - instruction_function, - suite_dir, - input_test_dir_name, - min_test_time=None, - max_test_time=None, -): - """Application d'une instruction sur une suite ou sur un repertoire de test specifique""" - assert suite_dir != "" - - # Erreur si repertoire de suite absent - if not os.path.isdir(suite_dir): - utils.fatal_error("missing directory for test suite " + suite_dir) - - # Collecte des sous-repertoires de test - test_list = [] - # Cas avec un repertoire de test specifique - if input_test_dir_name is not None: - if os.path.isdir(os.path.join(suite_dir, input_test_dir_name)): - test_list.append(input_test_dir_name) - # Cas avec une suite de test - else: - for name in os.listdir(suite_dir): - if os.path.isdir(os.path.join(suite_dir, name)): - test_list.append(name) - - # Tri pour assurer la reproductibilite inter plateforme - test_list.sort() - - # Execution de l'instruction - for test_dir_name in test_list: - # lanceur d'instruction sur un directory - test_dir = os.path.realpath(os.path.join(suite_dir, test_dir_name)) - # verification de l'existence du directory - if not os.path.isdir(test_dir): - utils.fatal_error("directory " + test_dir + " does not exist") - # On ne prend en compte que les tests compatibles avedc les contraintes de temps - if results.is_results_ref_dir_time_selected( - test_dir, min_test_time, max_test_time - ): - # Application de l'instruction - current_dir = os.getcwd() - os.chdir(test_dir) - instruction_function(test_dir) - os.chdir(current_dir) - # Message synthetique de fin - suite_dir_name = utils.dir_name(suite_dir) - tool_dir_name = utils.parent_dir_name(suite_dir, 1) - if input_test_dir_name is None: - print("DONE\t" + tool_dir_name + "\t" + suite_dir_name) - else: - print( - "done\t" - + tool_dir_name - + "\t" - + suite_dir_name - + "\t" - + input_test_dir_name - ) - - -def apply_instruction_on_learning_test_tree( - home_dir, - input_tool_dir_name, - input_suite_dir_name, - input_test_dir_name, - instruction_function, - family, - **kwargs -): - """Applique une instruction un ensemble de suites de tests - Toute ou partie de l'arborescence est prise en compte selon la specification - des operandes tool_dir_name, suite_dir_name, test_dir_name, qui peuvent etre None sinon. - - home_dir: repertoire principal de l'aborescence source - - tool_dir_name, suite_dir_name, test_dir_name: pour ne prendre en compte qu'une sous-partie - de l'arborescence source si ces oprande ne sont pas None - - instruction_function: instruction a appliquee - - family: famille utilise pour choisir la sous-partie des suites a exporter - - kwargs: argument optionnels de la ligne de commande - """ - - # Tous les outils sont a prendre en compte si on est a la racine - if input_tool_dir_name is None: - used_tool_names = kht.TOOL_NAMES - # Sinon, seul l'outil correspondant au tool dir est a tester - else: - tool_name = kht.TOOL_NAMES_PER_DIR_NAME[input_tool_dir_name] - used_tool_names = [tool_name] - - # Cas d'un seul outil avec un repertoire de suite au de test specifique - # Dans ce cas, on ignore la famille - if input_suite_dir_name is not None: - suite_dir = os.path.join(home_dir, input_tool_dir_name, input_suite_dir_name) - apply_instruction_on_suite_dir( - instruction_function, suite_dir, input_test_dir_name, **kwargs - ) - # Cas d'un ou plusieurs outils, ou il faut utiliser les suites de la famille specifiee - else: - assert len(used_tool_names) >= 1 - for tool_name in used_tool_names: - tool_dir_name = kht.TOOL_DIR_NAMES[tool_name] - if family == test_families.ALL: - test_suites = utils.sub_dirs(os.path.join(home_dir, tool_dir_name)) - else: - test_suites = test_families.FAMILY_TEST_SUITES[family, tool_name] - # Parcours de toutes les suites - for name in test_suites: - suite_dir = os.path.join(home_dir, tool_dir_name, name) - if os.path.isdir(suite_dir): - apply_instruction_on_suite_dir( - instruction_function, suite_dir, None, **kwargs - ) - else: - print("error : suite directory not found: " + suite_dir) - - -def register_all_instructions(): - """Enregistrement de toutes les instructions standards et a usage unique - Retourne un dictionnaire des instructions, et le nombre d'instructions standards - """ - - instructions1 = standard_instructions.register_standard_instructions() - instructions2 = one_shot_instructions.register_one_shot_instructions() - # L'operateur d'union entre dictionnaires '|' n'est supporte que depuis python 3.9 - all_instructions = instructions1 - all_instructions.update(instructions2) - return all_instructions, len(instructions1) - - -def main(): - """Fonction principale d'application systematique d'une instruction sur une suite de test""" - - def build_usage_help( - help_command, - help_instruction, - help_tool_dir_name=None, - help_suite_dir_name=None, - help_test_dir_name=None, - help_options=None, - ): - """Construction d'une ligne d'aide pour un usage de la commande test""" - source_dir = os.path.join(".", kht.LEARNING_TEST) - if help_test_dir_name is not None: - source_dir = os.path.join( - source_dir, help_tool_dir_name, help_suite_dir_name, help_test_dir_name - ) - elif help_suite_dir_name is not None: - source_dir = os.path.join( - source_dir, help_tool_dir_name, help_suite_dir_name - ) - elif help_tool_dir_name is not None: - source_dir = os.path.join(source_dir, help_tool_dir_name) - usage_help = help_command + " " + source_dir + " " + help_instruction - if help_options is not None: - usage_help += " " + help_options - return usage_help - - # Enregistrement de toutes les instructions - ( - all_instructions, - standard_instruction_number, - ) = register_all_instructions() - - # Nom du script - script_file_name = os.path.basename(__file__) - script_name = os.path.splitext(script_file_name)[0] - - # Ajout d'exemples d'utilisation - epilog = "" - epilog += "Usage examples" - epilog += "\n " + build_usage_help(script_name, "errors") - epilog += "\n " + build_usage_help( - script_name, - "logs", - kht.TOOL_DIR_NAMES[kht.KHIOPS], - "Standard", - "Iris", - ) - epilog += "\n " + build_usage_help( - script_name, - "errors", - kht.TOOL_DIR_NAMES[kht.COCLUSTERING], - help_options="-f basic", - ) - - # Affichage de la liste des instructions disponibles, en la formattant au mieux - instructions_help = "" - max_id_len = 0 - for instruction_id in all_instructions: - max_id_len = max(max_id_len, len(instruction_id)) - for index, instruction_id in enumerate(all_instructions): - (instruction_function, instruction_label) = all_instructions[instruction_id] - if index == standard_instruction_number: - instructions_help += "\none-shot instructions" - instructions_help += ( - "\n " + instruction_id.ljust(max_id_len + 1) + instruction_label - ) - - # Parametrage de l'analyse de la ligne de commande - parser = argparse.ArgumentParser( - prog=script_name, - description="apply instruction (ex: errors) on a subset of test dirs", - epilog=epilog, - formatter_class=utils.get_formatter_class(script_name), - ) - - # Arguments positionnels - utils.argument_parser_add_source_argument(parser) - parser.add_argument( - "instruction", - help="instruction to apply" + instructions_help, - ) - - # Arguments optionnels standards - utils.argument_parser_add_family_argument(parser) - utils.argument_parser_add_processes_argument(parser) - utils.argument_parser_add_forced_platform_argument(parser) - utils.argument_parser_add_limit_test_time_arguments(parser) - - # Analyse de la ligne de commande - args = parser.parse_args() - - # Verification de l'argument source - ( - home_dir, - tool_dir_name, - suite_dir_name, - test_dir_name, - ) = utils.argument_parser_check_source_argument(parser, args.source) - - # Verification de l'argument instruction - # On n'utilise pas le parametre 'choices' de add_argument pour eviter - # d'avoir des messages d'erreur tres long comportant toutes les valeurs possibles - if args.instruction not in all_instructions: - parser.error( - "argument instruction: unknown instruction '" + args.instruction + "'" - ) - - # Verification des arguments optionnels - utils.argument_parser_check_processes_argument(parser, args.n) - utils.argument_parser_check_limit_test_time_arguments( - parser, args.min_test_time, args.max_test_time - ) - - # Memorisation des variables globales de gestion du contexte des resultats de reference - results.process_number = args.n - results.forced_platform = args.forced_platform - - # Acces a l'instruction a executer - (instruction_function, instruction_label) = all_instructions[args.instruction] - - # Lancement de la commande - apply_instruction_on_learning_test_tree( - home_dir, - tool_dir_name, - suite_dir_name, - test_dir_name, - instruction_function, - args.family, - min_test_time=args.min_test_time, - max_test_time=args.max_test_time, - ) - - -if __name__ == "__main__": - utils.set_flushed_outputs() - main() +# Copyright (c) 2024 Orange. All rights reserved. +# This software is distributed under the BSD 3-Clause-clear License, the text of which is available +# at https://spdx.org/licenses/BSD-3-Clause-Clear.html or see the "LICENSE" file for more details. + +import os.path +import sys +import argparse + +import _kht_constants as kht +import _kht_utils as utils +import _kht_families as test_families +import _kht_results_management as results +import _kht_standard_instructions as standard_instructions +import _kht_one_shot_instructions as one_shot_instructions + +""" +Gestion de l'ensemble des instructions +""" + + +def apply_instruction_on_suite_dir( + instruction_function, + suite_dir, + input_test_dir_name, + min_test_time=None, + max_test_time=None, +): + """Application d'une instruction sur une suite ou sur un repertoire de test specifique""" + assert suite_dir != "" + + # Erreur si repertoire de suite absent + if not os.path.isdir(suite_dir): + utils.fatal_error("missing directory for test suite " + suite_dir) + + # Collecte des sous-repertoires de test + test_list = [] + # Cas avec un repertoire de test specifique + if input_test_dir_name is not None: + if os.path.isdir(os.path.join(suite_dir, input_test_dir_name)): + test_list.append(input_test_dir_name) + # Cas avec une suite de test + else: + for name in os.listdir(suite_dir): + if os.path.isdir(os.path.join(suite_dir, name)): + test_list.append(name) + + # Tri pour assurer la reproductibilite inter plateforme + test_list.sort() + + # Execution de l'instruction + for test_dir_name in test_list: + # lanceur d'instruction sur un directory + test_dir = os.path.realpath(os.path.join(suite_dir, test_dir_name)) + # verification de l'existence du directory + if not os.path.isdir(test_dir): + utils.fatal_error("directory " + test_dir + " does not exist") + # On ne prend en compte que les tests compatibles avedc les contraintes de temps + if results.is_results_ref_dir_time_selected( + test_dir, min_test_time, max_test_time + ): + # Application de l'instruction + current_dir = os.getcwd() + os.chdir(test_dir) + instruction_function(test_dir) + os.chdir(current_dir) + # Message synthetique de fin + suite_dir_name = utils.dir_name(suite_dir) + tool_dir_name = utils.parent_dir_name(suite_dir, 1) + if input_test_dir_name is None: + print("DONE\t" + tool_dir_name + "\t" + suite_dir_name) + else: + print( + "done\t" + + tool_dir_name + + "\t" + + suite_dir_name + + "\t" + + input_test_dir_name + ) + + +def apply_instruction_on_learning_test_tree( + home_dir, + input_tool_dir_name, + input_suite_dir_name, + input_test_dir_name, + instruction_function, + family, + **kwargs +): + """Applique une instruction un ensemble de suites de tests + Toute ou partie de l'arborescence est prise en compte selon la specification + des operandes tool_dir_name, suite_dir_name, test_dir_name, qui peuvent etre None sinon. + - home_dir: repertoire principal de l'aborescence source + - tool_dir_name, suite_dir_name, test_dir_name: pour ne prendre en compte qu'une sous-partie + de l'arborescence source si ces oprande ne sont pas None + - instruction_function: instruction a appliquee + - family: famille utilise pour choisir la sous-partie des suites a exporter + - kwargs: argument optionnels de la ligne de commande + """ + + # Tous les outils sont a prendre en compte si on est a la racine + if input_tool_dir_name is None: + used_tool_names = kht.TOOL_NAMES + # Sinon, seul l'outil correspondant au tool dir est a tester + else: + tool_name = kht.TOOL_NAMES_PER_DIR_NAME[input_tool_dir_name] + used_tool_names = [tool_name] + + # Cas d'un seul outil avec un repertoire de suite au de test specifique + # Dans ce cas, on ignore la famille + if input_suite_dir_name is not None: + suite_dir = os.path.join(home_dir, input_tool_dir_name, input_suite_dir_name) + apply_instruction_on_suite_dir( + instruction_function, suite_dir, input_test_dir_name, **kwargs + ) + # Cas d'un ou plusieurs outils, ou il faut utiliser les suites de la famille specifiee + else: + assert len(used_tool_names) >= 1 + for tool_name in used_tool_names: + tool_dir_name = kht.TOOL_DIR_NAMES[tool_name] + if family == test_families.ALL: + test_suites = utils.sub_dirs(os.path.join(home_dir, tool_dir_name)) + else: + test_suites = test_families.FAMILY_TEST_SUITES[family, tool_name] + # Parcours de toutes les suites + for name in test_suites: + suite_dir = os.path.join(home_dir, tool_dir_name, name) + if os.path.isdir(suite_dir): + apply_instruction_on_suite_dir( + instruction_function, suite_dir, None, **kwargs + ) + else: + print("error : suite directory not found: " + suite_dir) + + +def register_all_instructions(): + """Enregistrement de toutes les instructions standards et a usage unique + Retourne un dictionnaire des instructions, et le nombre d'instructions standards + """ + + instructions1 = standard_instructions.register_standard_instructions() + instructions2 = one_shot_instructions.register_one_shot_instructions() + # L'operateur d'union entre dictionnaires '|' n'est supporte que depuis python 3.9 + all_instructions = instructions1 + all_instructions.update(instructions2) + return all_instructions, len(instructions1) + + +def main(): + """Fonction principale d'application systematique d'une instruction sur une suite de test""" + + def build_usage_help( + help_command, + help_instruction, + help_tool_dir_name=None, + help_suite_dir_name=None, + help_test_dir_name=None, + help_options=None, + ): + """Construction d'une ligne d'aide pour un usage de la commande test""" + source_dir = os.path.join(".", kht.LEARNING_TEST) + if help_test_dir_name is not None: + source_dir = os.path.join( + source_dir, help_tool_dir_name, help_suite_dir_name, help_test_dir_name + ) + elif help_suite_dir_name is not None: + source_dir = os.path.join( + source_dir, help_tool_dir_name, help_suite_dir_name + ) + elif help_tool_dir_name is not None: + source_dir = os.path.join(source_dir, help_tool_dir_name) + usage_help = help_command + " " + source_dir + " " + help_instruction + if help_options is not None: + usage_help += " " + help_options + return usage_help + + # Enregistrement de toutes les instructions + ( + all_instructions, + standard_instruction_number, + ) = register_all_instructions() + + # Nom du script + script_file_name = os.path.basename(__file__) + script_name = os.path.splitext(script_file_name)[0] + + # Ajout d'exemples d'utilisation + epilog = "" + epilog += "Usage examples" + epilog += "\n " + build_usage_help(script_name, "errors") + epilog += "\n " + build_usage_help( + script_name, + "logs", + kht.TOOL_DIR_NAMES[kht.KHIOPS], + "Standard", + "Iris", + ) + epilog += "\n " + build_usage_help( + script_name, + "errors", + kht.TOOL_DIR_NAMES[kht.COCLUSTERING], + help_options="-f basic", + ) + + # Affichage de la liste des instructions disponibles, en la formattant au mieux + instructions_help = "" + max_id_len = 0 + for instruction_id in all_instructions: + max_id_len = max(max_id_len, len(instruction_id)) + for index, instruction_id in enumerate(all_instructions): + (instruction_function, instruction_label) = all_instructions[instruction_id] + if index == standard_instruction_number: + instructions_help += "\none-shot instructions" + instructions_help += ( + "\n " + instruction_id.ljust(max_id_len + 1) + instruction_label + ) + + # Parametrage de l'analyse de la ligne de commande + parser = argparse.ArgumentParser( + prog=script_name, + description="apply instruction (ex: errors) on a subset of test dirs", + epilog=epilog, + formatter_class=utils.get_formatter_class(script_name), + ) + + # Arguments positionnels + utils.argument_parser_add_source_argument(parser) + parser.add_argument( + "instruction", + help="instruction to apply" + instructions_help, + ) + + # Arguments optionnels standards + utils.argument_parser_add_family_argument(parser) + utils.argument_parser_add_processes_argument(parser) + utils.argument_parser_add_forced_platform_argument(parser) + utils.argument_parser_add_limit_test_time_arguments(parser) + + # Analyse de la ligne de commande + args = parser.parse_args() + + # Verification de l'argument source + ( + home_dir, + tool_dir_name, + suite_dir_name, + test_dir_name, + ) = utils.argument_parser_check_source_argument(parser, args.source) + + # Verification de l'argument instruction + # On n'utilise pas le parametre 'choices' de add_argument pour eviter + # d'avoir des messages d'erreur tres long comportant toutes les valeurs possibles + if args.instruction not in all_instructions: + parser.error( + "argument instruction: unknown instruction '" + args.instruction + "'" + ) + + # Verification des arguments optionnels + utils.argument_parser_check_processes_argument(parser, args.n) + utils.argument_parser_check_limit_test_time_arguments( + parser, args.min_test_time, args.max_test_time + ) + + # Memorisation des variables globales de gestion du contexte des resultats de reference + results.process_number = args.n + results.forced_platform = args.forced_platform + + # Acces a l'instruction a executer + (instruction_function, instruction_label) = all_instructions[args.instruction] + + # Lancement de la commande + apply_instruction_on_learning_test_tree( + home_dir, + tool_dir_name, + suite_dir_name, + test_dir_name, + instruction_function, + args.family, + min_test_time=args.min_test_time, + max_test_time=args.max_test_time, + ) + + +if __name__ == "__main__": + utils.set_flushed_outputs() + main() diff --git a/test/LearningTestTool/py/kht_collect_results.py b/test/LearningTestTool/py/kht_collect_results.py index cbfdddaec..70f9bd5b7 100644 --- a/test/LearningTestTool/py/kht_collect_results.py +++ b/test/LearningTestTool/py/kht_collect_results.py @@ -1,3 +1,7 @@ +# Copyright (c) 2024 Orange. All rights reserved. +# This software is distributed under the BSD 3-Clause-clear License, the text of which is available +# at https://spdx.org/licenses/BSD-3-Clause-Clear.html or see the "LICENSE" file for more details. + import os import sys import os.path diff --git a/test/LearningTestTool/py/kht_env.py b/test/LearningTestTool/py/kht_env.py index 601955ce2..1e474cf34 100644 --- a/test/LearningTestTool/py/kht_env.py +++ b/test/LearningTestTool/py/kht_env.py @@ -1,70 +1,74 @@ -import os -import argparse - -import _kht_constants as kht -import _kht_utils as utils - - -def print_env_var_help(env_var, help_text): - """Affichage de l'aide sur une variable d'environnement""" - print(env_var + ": " + str(os.getenv(env_var)) + "\n\t" + help_text) - - -def get_env_var_help_label(env_var, help_text): - """Renvoie un libelle d'aide sur une variable d'environnement""" - return env_var + ": " + str(os.getenv(env_var)) + ", " + help_text - - -def help_env_vars(): - # Aide sur les variables d'environnement influant le comportement des outils Khiops - print("") - print_env_var_help( - kht.KHIOPS_PREPARATION_TRACE_MODE, - "trace for dimensionnining of preparation tasks (default: false)", - ) - print_env_var_help(kht.KHIOPS_PARALLEL_TRACE, "trace for parallel tasks (0 to 3)") - print_env_var_help( - kht.KHIOPS_FILE_SERVER_ACTIVATED, "activate MPI file server (false,true)" - ) - - # Aide particulier sur le pilotage des traces memoire - print( - "Analysis of memory stats" - + "\n\t" - + get_env_var_help_label( - kht.KHIOPS_MEM_STATS_LOG_FILE_NAME, "memory stats log file name" - ) - + "\n\t" - + get_env_var_help_label( - kht.KHIOPS_MEM_STATS_LOG_FREQUENCY, - "frequency of allocator stats collection (0, 100000, 1000000,...)", - ) - + "\n\t" - + get_env_var_help_label( - kht.KHIOPS_MEM_STATS_LOG_TO_COLLECT, - "stats to collect (8193: only time and labels, 16383: all,...)", - ) - + "\n\t" - + get_env_var_help_label( - kht.KHIOPS_IO_TRACE_MODE, "to collect IO trace (false, true)" - ) - ) - - -def main(): - """Fonction principale d'affichage de l'aide sur les variables d'environnement""" - # Parametrage de l'analyse de la ligne de commande - script_name = os.path.basename(__file__) - base_script_name = os.path.splitext(script_name)[0] - parser = argparse.ArgumentParser( - prog=base_script_name, - description="show the status of the main environment variables used by the tool binaries", - ) - # Analyse de la ligne de commande et execution - parser.parse_args() - help_env_vars() - - -if __name__ == "__main__": - utils.set_flushed_outputs() - main() +# Copyright (c) 2024 Orange. All rights reserved. +# This software is distributed under the BSD 3-Clause-clear License, the text of which is available +# at https://spdx.org/licenses/BSD-3-Clause-Clear.html or see the "LICENSE" file for more details. + +import os +import argparse + +import _kht_constants as kht +import _kht_utils as utils + + +def print_env_var_help(env_var, help_text): + """Affichage de l'aide sur une variable d'environnement""" + print(env_var + ": " + str(os.getenv(env_var)) + "\n\t" + help_text) + + +def get_env_var_help_label(env_var, help_text): + """Renvoie un libelle d'aide sur une variable d'environnement""" + return env_var + ": " + str(os.getenv(env_var)) + ", " + help_text + + +def help_env_vars(): + # Aide sur les variables d'environnement influant le comportement des outils Khiops + print("") + print_env_var_help( + kht.KHIOPS_PREPARATION_TRACE_MODE, + "trace for dimensionnining of preparation tasks (default: false)", + ) + print_env_var_help(kht.KHIOPS_PARALLEL_TRACE, "trace for parallel tasks (0 to 3)") + print_env_var_help( + kht.KHIOPS_FILE_SERVER_ACTIVATED, "activate MPI file server (false,true)" + ) + + # Aide particulier sur le pilotage des traces memoire + print( + "Analysis of memory stats" + + "\n\t" + + get_env_var_help_label( + kht.KHIOPS_MEM_STATS_LOG_FILE_NAME, "memory stats log file name" + ) + + "\n\t" + + get_env_var_help_label( + kht.KHIOPS_MEM_STATS_LOG_FREQUENCY, + "frequency of allocator stats collection (0, 100000, 1000000,...)", + ) + + "\n\t" + + get_env_var_help_label( + kht.KHIOPS_MEM_STATS_LOG_TO_COLLECT, + "stats to collect (8193: only time and labels, 16383: all,...)", + ) + + "\n\t" + + get_env_var_help_label( + kht.KHIOPS_IO_TRACE_MODE, "to collect IO trace (false, true)" + ) + ) + + +def main(): + """Fonction principale d'affichage de l'aide sur les variables d'environnement""" + # Parametrage de l'analyse de la ligne de commande + script_name = os.path.basename(__file__) + base_script_name = os.path.splitext(script_name)[0] + parser = argparse.ArgumentParser( + prog=base_script_name, + description="show the status of the main environment variables used by the tool binaries", + ) + # Analyse de la ligne de commande et execution + parser.parse_args() + help_env_vars() + + +if __name__ == "__main__": + utils.set_flushed_outputs() + main() diff --git a/test/LearningTestTool/py/kht_export.py b/test/LearningTestTool/py/kht_export.py index 6b272e58b..0c567bfe5 100644 --- a/test/LearningTestTool/py/kht_export.py +++ b/test/LearningTestTool/py/kht_export.py @@ -1,3 +1,7 @@ +# Copyright (c) 2024 Orange. All rights reserved. +# This software is distributed under the BSD 3-Clause-clear License, the text of which is available +# at https://spdx.org/licenses/BSD-3-Clause-Clear.html or see the "LICENSE" file for more details. + import os import os.path import argparse diff --git a/test/LearningTestTool/py/kht_help.py b/test/LearningTestTool/py/kht_help.py index 0e8cd0c4b..7c40cb394 100644 --- a/test/LearningTestTool/py/kht_help.py +++ b/test/LearningTestTool/py/kht_help.py @@ -1,3 +1,7 @@ +# Copyright (c) 2024 Orange. All rights reserved. +# This software is distributed under the BSD 3-Clause-clear License, the text of which is available +# at https://spdx.org/licenses/BSD-3-Clause-Clear.html or see the "LICENSE" file for more details. + import os import argparse diff --git a/test/LearningTestTool/py/kht_test.py b/test/LearningTestTool/py/kht_test.py index c1b7fa775..966e2fb3d 100644 --- a/test/LearningTestTool/py/kht_test.py +++ b/test/LearningTestTool/py/kht_test.py @@ -1,977 +1,984 @@ -import os.path -import sys -import platform -import shutil -import subprocess -import time -import argparse - -import _kht_constants as kht -import _kht_utils as utils -import _kht_families as test_families -import _kht_results_management as results -import _kht_check_results as check - -# mpiexec sous Windows -if os.name == "nt": - mpi_exe_name = "mpiexec.exe" -# mpiexec sous Linux -else: - mpi_exe_name = "mpirun" - - -def build_tool_exe_path(tool_binaries_dir, tool_name): - """Construction du chemin de l'executable d'un outil a partir du repertoire des binaire - Le premier parametre peut contenir plusieurs types de valeurs - - un repertoire devant contenir les binaire de l'outil a tester - - 'r' ou 'd', alias pour le repertoire des binaires en release ou debug de l'envbironnement de developpement - - 'check': pour effectuer seulment une comparaison entre resultats de test et de reference - On renvoie: - - le path complet d'un binaire d'un outil si un repertoire est specifie, 'check' sinon, None si erreur - - le message d'erreur en cas d'error - """ - assert tool_name in kht.TOOL_NAMES - tool_exe_path = None - error_message = "" - # Cas particulier de la comparaison seulement - if tool_binaries_dir == "check": - return "check", error_message - - # Recherche du repertoire des binaires de l'environnement de developpement - alias_info = "" - current_platform = results.get_context_platform_type() - assert current_platform in kht.RESULTS_REF_TYPE_VALUES[kht.PLATFORM] - # Cas d'un alias pour rechercher le repertoire des binaires dans l'environnement de developpement - actual_tool_binaries_dir = "" - if tool_binaries_dir in [kht.ALIAS_D, kht.ALIAS_R]: - script_path = __file__ - # Repertoire ou sont construit les produits de compilation - build_dir = os.path.realpath( - os.path.join( - script_path, - "..", - "..", - "..", - "..", - "build", - ) - ) - # Suffixe du nom du repertoire contenant les binaires - searched_suffix = "" - if tool_binaries_dir == kht.ALIAS_D: - searched_suffix = "-debug" - elif tool_binaries_dir == kht.ALIAS_R: - searched_suffix = "-release" - # Recherche des sous repertoire contenant le bon suffixe, plus un sous-repertoire bin - candidate_binaries_dirs = [] - if os.path.isdir(build_dir): - for name in os.listdir(build_dir): - if ( - searched_suffix in name - and name[len(name) - len(searched_suffix) :] == searched_suffix - ): - binaries_dir = os.path.join(build_dir, name, "bin") - if os.path.isdir(binaries_dir): - candidate_binaries_dirs.append(binaries_dir) - # Erreur si repertoire des binaires non trouve - if len(candidate_binaries_dirs) == 0: - error_message = ( - "Tool binaries dir for alias '" - + tool_binaries_dir - + "' not found in current khiops repo under the bin dir " - + build_dir - ) - # Erreur si plusieurs repertoires des binaires non trouves - elif len(candidate_binaries_dirs) > 1: - error_message = ( - "Multiple tool binaries dir found for alias '" - + tool_binaries_dir - + "' in current khiops repo under the bin dir :" - + utils.list_to_label(candidate_binaries_dirs) - ) - # On a trouve un repertoire des binaires - else: - assert len(candidate_binaries_dirs) == 1 - actual_tool_binaries_dir = candidate_binaries_dirs[0] - - # Infos sur l'alias, pour les messages d'erreur - alias_info = ( - " (used for alias '" - + tool_binaries_dir - + "' on platform " - + current_platform - + ")" - ) - # Cas d'un repertoire des binaires specifie directement - else: - actual_tool_binaries_dir = os.path.realpath(tool_binaries_dir) - assert actual_tool_binaries_dir != "" or error_message != "" - - # Test qu'il s'agit bien d'un repertoire - if error_message == "" and not os.path.isdir(actual_tool_binaries_dir): - error_message = ( - tool_name - + " binary " - + actual_tool_binaries_dir - + " dir" - + alias_info - + " is not a valid directory" - ) - - # Construction du path du binaire de l'outil - if error_message == "": - tool_exe_name = kht.TOOL_EXE_NAMES[tool_name] - if current_platform == "Windows": - tool_exe_name += ".exe" - tool_exe_path = os.path.join(actual_tool_binaries_dir, tool_exe_name) - if not os.path.isfile(tool_exe_path): - tool_exe_path = None - # si le binaire n'existe pas, c'est peut-etre un binaire parallele qui a un suffixe - if tool_name in kht.PARALLEL_TOOL_NAMES: - tool_with_suffixes = [] - tested_binaries_name = [] - # construction de la liste des binaires avec suffixe qui sont presents dans le repertoire bin - for suffix in kht.TOOL_MPI_SUFFIXES: - tool_exe_name = kht.TOOL_EXE_NAMES[tool_name] + suffix - if platform == "Windows": - tool_exe_name += ".exe" - tested_binaries_name.append(tool_exe_name) - tool_exe_path = os.path.join( - actual_tool_binaries_dir, tool_exe_name - ) - if os.path.isfile(tool_exe_path): - tool_with_suffixes.append(tool_exe_path) - # Si il y en a plusieurs ou aucun, il y a une erreur - if len(tool_with_suffixes) == 0: - tool_exe_path = None - tool_full_name = "" - for name in tested_binaries_name: - tool_full_name += name + " " - tool_full_name += kht.TOOL_EXE_NAMES[tool_name] - error_message = ( - "no binaries found for " - + tool_name - + " (" - + tool_full_name.rstrip() - + ") in " - + actual_tool_binaries_dir - + alias_info - ) - elif len(tool_with_suffixes) > 1: - tool_exe_path = None - conflict_names = "" - for name in tool_with_suffixes: - conflict_names += os.path.basename(name) + " " - error_message = ( - "multiple binaries found for " - + tool_name - + " (" - + conflict_names.rstrip() - + ") in " - + actual_tool_binaries_dir - + alias_info - ) - else: - tool_exe_path = tool_with_suffixes[0] - # Message d'erreur par defaut - if tool_exe_path == None and error_message == "": - error_message = ( - tool_name - + " binary (" - + tool_exe_name - + ") not found in tool binaries dir " - + actual_tool_binaries_dir - + alias_info - ) - return tool_exe_path, error_message - - -def evaluate_tool_on_test_dir( - tool_exe_path, - suite_dir, - test_dir_name, - min_test_time=None, - max_test_time=None, - test_timeout_limit=None, - task_file=False, - output_scenario=False, - user_interface=False, -): - """Evaluation d'un outil sur un repertoire de test terminal et comparaison des resultats - Parametres: - - tool_exe_path: path de l'outil a tester, ou nul si on ne veut faire que la comparaison - - suite_dir: repertoire racine du repertoire de test - - test_dir_name: repertoire de test terminal""" - - # Verification du chemin de l'exe - if tool_exe_path != kht.ALIAS_CHECK: - if not os.path.isfile(tool_exe_path): - utils.fatal_error("tool path : " + tool_exe_path + " is not correct") - - # Verification de l'integrite du repertoire de test - test_dir = os.path.join(suite_dir, test_dir_name) - utils.check_test_dir(test_dir) - - # Extraction des repertoires principaux - suite_dir_name = utils.dir_name(suite_dir) - tool_dir_name = utils.parent_dir_name(suite_dir, 1) - - # Nom de l'outil - tool_name = kht.TOOL_NAMES_PER_DIR_NAME.get(tool_dir_name) - - # Recherche du chemin de l'executable et positionnement du path pour l'exe et la dll - tool_exe_dir = os.path.dirname(tool_exe_path) - if os.name == "nt": - initial_path = os.getenv("path") - os.environ["path"] = tool_exe_dir + ";" + os.getenv("path") - else: - initial_path = os.getenv("LD_LIBRARY_PATH", "") - os.environ["LD_LIBRARY_PATH"] = ( - tool_exe_dir + ":" + os.getenv("LD_LIBRARY_PATH", "") - ) - - # On se met dans le repertoire de test - os.chdir(test_dir) - - # Recherche du contexte parallele - tool_process_number = results.process_number - if tool_name not in kht.PARALLEL_TOOL_NAMES: - tool_process_number = 1 - - # Affichage du debut des tests ou de la comparaison - action_name = "Test" - exe_path_info = "\n exe: " + tool_exe_path - if tool_exe_path == kht.ALIAS_CHECK: - action_name = "Comparison" - exe_path_info = "" - print( - "starting " - + action_name - + " " - + tool_dir_name - + " " - + suite_dir_name - + " " - + test_dir_name - + " (processes: " - + str(tool_process_number) - + ", platform: " - + results.get_context_platform_type() - + ")" - + exe_path_info - ) - - # Lancement des tests - if tool_exe_path != kht.ALIAS_CHECK: - # Recherche du nom du l'executable Khiops (sans l'extension) - tool_exe_full_name, _ = os.path.splitext(os.path.basename(tool_exe_path)) - - # ... et sans le suffixe mpi - tool_exe_name = utils.extract_tool_exe_name(tool_exe_full_name) - - # Recherche du nom de l'outil correspondant - if tool_exe_name not in kht.TOOL_EXE_NAMES.values(): - utils.fatal_error( - "tool exe " - + tool_exe_name - + " from " - + tool_exe_path - + " should be in " - + utils.list_to_label(kht.TOOL_EXE_NAMES.values()) - ) - tool_name_per_exe_name = kht.TOOL_NAMES_PER_EXE_NAME.get(tool_exe_name) - if tool_name_per_exe_name != tool_name: - utils.fatal_error( - "Tool exe " - + tool_exe_path - + " inconsistent with tool directory " - + tool_dir_name - ) - - # Recherche du temps des resultats de reference dans le fichier de temps - results_ref_test_time = results.get_results_ref_dir_time(test_dir) - - # Arret si test trop long ou trop court - if not results.is_results_ref_dir_time_selected( - test_dir, min_test_time, max_test_time - ): - print( - test_dir_name - + " test not launched (test time: " - + str(results_ref_test_time) - + ")\n" - ) - return - - # Nettoyage du repertoire de resultats - results_dir = os.path.join(test_dir, kht.RESULTS) - if os.path.isdir(results_dir): - for file_name in os.listdir(results_dir): - file_path = os.path.join(results_dir, file_name) - utils.remove_file(file_path) - else: - # Creation du repertoire de resultats - # (on n'est pas sur que khiops cree le repertoire, par exemple en cas de Segmentation - # fault et si il n'existe pas on ne pourra pas ecrire dedans...) - os.mkdir(results_dir) - - # khiops en mode expert via une variable d'environnement - os.environ[kht.KHIOPS_EXPERT_MODE] = "true" - - # khiops en mode HardMemoryLimit via une variable d'environnement pour provoquer - # un plantage physique de l'allocateur en cas de depassement des contraintes memoires des scenarios - os.environ[kht.KHIOPS_HARD_MEMORY_LIMIT_MODE] = "true" - - # khiops en mode crash test via une variable d'environnement - os.environ[kht.KHIOPS_CRASH_TEST_MODE] = "true" - - # Ajout de variables d'environements propres a OpenMPI, elles remplacent les parametres - # on peut ansi lancer indiferemment mpich ou openmpi - if platform.system() == "Linux": - # Supprime les traces en cas d'erreur fatale de khiops. Option --quiet - os.environ["OMPI_MCA_orte_execute_quiet"] = "true" - - # permet de lancer plus de processus qu'il n'y a de coeurs. Option --oversubscribe - os.environ["OMPI_MCA_rmaps_base_oversubscribe"] = "true" - - # permet de lancer en tant que root. Option --allow-run-as-root - os.environ["OMPI_ALLOW_RUN_AS_ROOT"] = "1" - os.environ["OMPI_ALLOW_RUN_AS_ROOT_CONFIRM"] = "1" - - # Ajout de variables d'environements propres a OpenMPI, elles remplacent les parametres - if platform.system() == "Linux": - # Supprime les traces en cas d'erreur fatale de khiops. Option --quiet - os.environ["OMPI_MCA_orte_execute_quiet"] = "true" - # permet de lancer plus de processus qu'il n'y a de coeurs. Option --oversubscribe - os.environ["OMPI_MCA_rmaps_base_oversubscribe"] = "true" - # permet de lancer en tant que root. Option --allow-run-as-root - os.environ["OMPI_ALLOW_RUN_AS_ROOT"] = "1" - os.environ["OMPI_ALLOW_RUN_AS_ROOT_CONFIRM"] = "1" - - # Construction des parametres - khiops_params = [] - if tool_process_number > 1: - khiops_params.append(mpi_exe_name) - # Option -l, specifique a mpich, valide au moins pour Windows: - # "Label standard out and standard error (stdout and stderr) with the rank of the process" - if platform.system() == "Windows": - khiops_params.append("-l") - if platform.system() == "Darwin": - khiops_params.append("-host") - khiops_params.append("localhost") - khiops_params.append("-n") - khiops_params.append(str(tool_process_number)) - khiops_params.append(tool_exe_path) - if not user_interface: - khiops_params.append("-b") - khiops_params.append("-i") - khiops_params.append(kht.TEST_PRM) - khiops_params.append("-e") - khiops_params.append(os.path.join(results_dir, kht.ERR_TXT)) - if output_scenario: - khiops_params.append("-o") - khiops_params.append(os.path.join(results_dir, "output_test.prm")) - if task_file: - khiops_params.append("-p") - khiops_params.append(os.path.join(results_dir, "task_progression.log")) - - # Calcul d'un time_out en fonction du temps de reference, uniquement si celui est disponible - timeout = None - if results_ref_test_time is not None: - if test_timeout_limit is None: - test_timeout_limit = kht.MIN_TIMEOUT - timeout = test_timeout_limit + kht.TIMEOUT_RATIO * results_ref_test_time - - # Lancement de khiops - timeout_expiration_lines = [] - overall_time_start = time.time() - for run_number in range(kht.MAX_RUN_NUMBER): - run_completed = True - time_start = time.time() - with subprocess.Popen( - khiops_params, - stdin=subprocess.DEVNULL, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - universal_newlines=True, - ) as khiops_process: - try: - stdout, stderr = khiops_process.communicate(timeout=timeout) - except subprocess.TimeoutExpired: - run_completed = False - khiops_process.kill() - stdout, stderr = khiops_process.communicate() - time_stop = time.time() - # Memorisation du probleme en cas d'echec - if not run_completed: - killing_time = time_stop - time_start - results_ref_test_time_info = "" - if results_ref_test_time is not None: - results_ref_test_time_info = ( - " (reference time=" - + "{:.1f}".format(results_ref_test_time) - + "s)" - ) - timeout_expiration_lines.append( - "Trial " - + str(run_number + 1) - + " : process killed after " - + "{:.1f}".format(killing_time) - + "s" - + results_ref_test_time_info - ) - # Arret si ok - if run_completed: - break - # Arret si on a depense globalement trop de temps - overall_time = time_stop - overall_time_start - if overall_time > kht.MAX_TIMEOUT and run_number < kht.MAX_RUN_NUMBER - 1: - timeout_expiration_lines.append( - "No more trial: overall trial time is " - + "{:.1f}".format(overall_time) - + "s (limit=" - + "{:.1f}".format(kht.MAX_TIMEOUT) - + "s)" - ) - break - overall_time_stop = time.time() - - # Memorisation des infos sur les run en cas de timeout - if len(timeout_expiration_lines) > 0: - with open( - os.path.join(results_dir, kht.PROCESS_TIMEOUT_ERROR_LOG), - "w", - errors="ignore", - ) as timeout_file: - for line in timeout_expiration_lines: - timeout_file.write(line + "\n") - - # En cas d'anomalie, memorisation du contenu des sorties standard - if stdout != "": - # Affichage sur la console, utile par exemple en mode debug pour avoir les stats memoire - print(stdout) - - # Pretraitement des lignes pour supprimer les lignes normales - # parfois specifiques a certains outils - is_kni = kht.KNI in tool_exe_path - is_coclustering = kht.COCLUSTERING in tool_exe_path - lines = stdout.split("\n") - lines = utils.filter_process_id_prefix_from_lines( - lines - ) # Suppression de l'eventuel prefix de type '[0] ' - lines = utils.filter_copyright_lines( - lines - ) # Suppression eventuelle des lignes de copyright - lines = utils.filter_empty_lines(lines) # Suppression des lignes vides - - # Pour les test KNI, le stdout contient une ligne avec le nombre de records - if is_kni: - lines = utils.filter_lines_with_pattern( - lines, ["Recoded record number:"] - ) - lines = utils.filter_lines_with_pattern( - lines, ["Error : Finish opening stream error:"] - ) - # Cas particulier du coclustering en mode debug - if is_coclustering: - lines = utils.filter_lines_with_pattern( - lines, ["BEWARE: Optimization level set to 0 in debug mode only!!!"] - ) - # Exception egalement pour cas des lancement en mode parallele simule - lines = utils.filter_lines_with_pattern( - lines, ["Warning : simulated parallel mode"] - ) - # Exception en mode debug, pour les stats memoire - if "Memory stats (number of pointers, and memory space)" in stdout: - ok = True - # Parcours des lignes pour voir si ce sont bien des messages de stats, y compris en parallel - # En parallele, on a l'id du process entre crochets en tete de chaque ligne - for line in lines: - # Ok si ligne vide - if line == "": - ok = True - # Recherche d'un pattern de message de l'allocateur - else: - ok = ( - "Memory stats (number of pointers, and memory space)" - in line - or "Alloc: " in line - or "Requested: " in line - ) - # Recherche additionnelle de "Process " en tete de ligne - # En effet, parfois en parallele, le debut d'un message commencant par "Process " - # est emis sur une ligne de stdout, et la fin sur une autre ligne - if not ok: - ok = line.find("Process ") >= 0 - if not ok: - break - else: - ok = len(lines) == 0 - if not ok: - try: - with open( - os.path.join(results_dir, kht.STDOUT_ERROR_LOG), - "w", - errors="ignore", - ) as stdout_file: - stdout_file.write(stdout) - except Exception as exception: - print( - "Enable to write file " - + kht.STDOUT_ERROR_LOG - + " in " - + kht.RESULTS - + " dir ", - exception, - ) - # Cas de la sortie d'erreur standard - if stderr != "": - print(stderr, file=sys.stderr) - try: - with open( - os.path.join(results_dir, kht.STDERR_ERROR_LOG), - "w", - errors="ignore", - ) as stderr_file: - stderr_file.write(stderr) - except Exception as exception: - print( - "Enable to write file " - + kht.STDERR_ERROR_LOG - + " in " - + kht.RESULTS - + " dir ", - exception, - ) - # Cas du code retour - if khiops_process.returncode != 0: - try: - with open( - os.path.join(results_dir, kht.RETURN_CODE_ERROR_LOG), - "w", - errors="ignore", - ) as return_code_file: - return_code_file.write( - "Wrong return code: " - + str(khiops_process.returncode) - + " (should be 0)" - ) - except Exception as exception: - print( - "Enable to write file " - + kht.RETURN_CODE_ERROR_LOG - + " in " - + kht.RESULTS - + " dir ", - exception, - ) - # Message de fin de test - print(tool_dir_name + " " + suite_dir_name + " " + test_dir_name + " test done") - - # Memorisation d'un fichier contenant le temp global - try: - with open( - os.path.join(results_dir, kht.TIME_LOG), - "w", - errors="ignore", - ) as time_file: - time_file.write(str(overall_time_stop - overall_time_start) + "\n") - except Exception as exception: - print( - "Enable to write file " + kht.TIME_LOG + " in " + kht.RESULTS + " dir ", - exception, - ) - - # Restore initial path - if os.name == "nt": - os.environ["path"] = initial_path - else: - os.environ["LD_LIBRARY_PATH"] = initial_path - - # Comparaison des resultats - os.chdir(suite_dir) - test_dir = os.path.join(suite_dir, test_dir_name) - check.check_results(test_dir) - - -def evaluate_tool_on_suite_dir(tool_exe_path, suite_dir, test_dir_name=None, **kwargs): - """Evaluation d'un outil sur une suite de test et comparaison des resultats - Parametres: - - tool_exe_path: path de l'outil a tester, ou nul si on ne veut faire que la comparaison - - suite_dir: repertoire de la suite de test - - test_dir_name: repertoire de test terminal""" - - # Erreur si repertoire de suite absent - if not os.path.isdir(suite_dir): - utils.fatal_error("missing directory for test suite " + suite_dir) - - # Collecte des sous-repertoire de test - test_list = [] - for file_name in os.listdir(suite_dir): - if os.path.isdir(os.path.join(suite_dir, file_name)): - test_list.append(file_name) - - # Erreur si pas de sous-repertoires - if len(test_list) == 0: - utils.fatal_error("no test dir is available in " + suite_dir) - - # Cas d'un repertoire de test specifique - if test_dir_name is not None: - evaluate_tool_on_test_dir(tool_exe_path, suite_dir, test_dir_name, **kwargs) - - # Cas de tous les sous-repertoires - else: - for name in test_list: - evaluate_tool_on_test_dir(tool_exe_path, suite_dir, name, **kwargs) - # Message global - suite_dir_name = utils.dir_name(suite_dir) - tool_dir_name = utils.parent_dir_name(suite_dir, 1) - action_name = "TEST" - if tool_exe_path == "nul": - action_name = "COMPARISON" - print(action_name + " DONE\t" + tool_dir_name + "\t" + suite_dir_name) - - -def evaluate_tool(tool_name, tool_exe_path, home_dir, test_suites, **kwargs): - """Lance les tests d'un outil sur un ensemble de suites de tests""" - assert tool_name in kht.TOOL_NAMES - assert utils.check_home_dir(home_dir) - # Recherche du repertoire lie a l'outil - tool_dir_name = kht.TOOL_DIR_NAMES[tool_name] - # Lancement des tests sur les repertoires valides - for suite_dir_name in test_suites: - suite_dir = os.path.join( - home_dir, - tool_dir_name, - suite_dir_name, - ) - if os.path.isdir(suite_dir): - print("\n\n--------------------------------------------------------") - print("\tRunning " + tool_name + " " + suite_dir_name + " tests") - print("--------------------------------------------------------") - suite_dir = os.path.join( - home_dir, - tool_dir_name, - suite_dir_name, - ) - evaluate_tool_on_suite_dir(tool_exe_path, suite_dir, **kwargs) - - -def evaluate_all_tools_on_learning_test_tree( - home_dir, - input_tool_dir_name, - input_suite_dir_name, - input_test_dir_name, - binaries_dir, - family, - **kwargs -): - """Lance les tests des outils un ensemble de suites de tests - Tout ou partie de l'arborescence est prise en compte selon la specification - des operandes tool_dir_name, suite_dir_name, test_dir_name, qui peuvent etre None sinon. - - home_dir: repertoire principal de l'aborescence source - - tool_dir_name, suite_dir_name, test_dir_name: pour ne prendre en compte qu'une sous-partie - de l'arborescence source si ces oprande ne sont pas None - - binaries_dir: repertorie des executables des outils - - family: famille utilise pour choisir la sous-partie des suites a exporter - - kwargs: argument optionnels de la ligne de commande - """ - # Tous les outils sont a prendre en compte si on est a la racine - if input_tool_dir_name is None: - used_tool_names = kht.TOOL_NAMES - # Sinon, seul l'outil correspondant au tool dir est a tester - else: - tool_name = kht.TOOL_NAMES_PER_DIR_NAME[input_tool_dir_name] - used_tool_names = [tool_name] - - # Parcours des repertoires des outils verifier les repertoires de suite et nettoyer les resultats - suite_errors = False - for tool_name in used_tool_names: - tool_dir_name = kht.TOOL_DIR_NAMES[tool_name] - tool_exe_path, error_message = build_tool_exe_path(binaries_dir, tool_name) - # Recherche des suites a utiliser - if input_suite_dir_name is not None: - assert tool_dir_name is not None - suite_dir_names = [input_suite_dir_name] - elif family == test_families.ALL: - suite_dir_names = utils.sub_dirs(os.path.join(home_dir, tool_dir_name)) - else: - suite_dir_names = test_families.FAMILY_TEST_SUITES[family, tool_name] - # Parcours des suites de la famille - for suite_dir_name in suite_dir_names: - suite_dir = os.path.join( - home_dir, - tool_dir_name, - suite_dir_name, - ) - if os.path.isdir(suite_dir): - # Repertoires de test a utiliser - if input_test_dir_name is not None: - test_dir_names = [input_test_dir_name] - else: - test_dir_names = os.listdir(suite_dir) - # Parcours des repertoires de test de la suite - for test_dir_name in test_dir_names: - # Nettoyage sauf si seulement comparaisons des resultats - if tool_exe_path != kht.ALIAS_CHECK: - test_dir = os.path.join(suite_dir, test_dir_name) - if os.path.isdir(test_dir): - # Nettoyage uniquement si test compatible avec les contraIntes de temps - if results.is_results_ref_dir_time_selected( - test_dir, - kwargs["min_test_time"], - kwargs["max_test_time"], - ): - file_path = os.path.join( - test_dir, kht.COMPARISON_RESULTS_LOG - ) - if os.path.isfile(file_path): - utils.remove_file(file_path) - results_dir = os.path.join(test_dir, kht.RESULTS) - if os.path.isdir(results_dir): - for file_name in os.listdir(results_dir): - file_path = os.path.join(results_dir, file_name) - utils.remove_file(file_path) - utils.remove_dir(results_dir) - # Message d'erreur si suite inexistante - else: - if not suite_errors: - print("") - suite_errors = True - print("error : suite directory not found: " + suite_dir) - - # Cas d'un seul outil avec un repertoire de suite au de test specifique - # Dans ce cas, on ignore la famille - if input_suite_dir_name is not None: - assert input_tool_dir_name is not None - tool_name = kht.TOOL_NAMES_PER_DIR_NAME[input_tool_dir_name] - tool_exe_path, error_message = build_tool_exe_path(binaries_dir, tool_name) - if tool_exe_path is None: - utils.fatal_error(error_message) - suite_dir = os.path.join(home_dir, input_tool_dir_name, input_suite_dir_name) - evaluate_tool_on_suite_dir( - tool_exe_path, - suite_dir, - input_test_dir_name, - **kwargs, - ) - # Cas d'un ou plusieurs outils, ou il faut utiliser les suites de la famille specifiee - else: - # Tous les outils sont a prendre en compte si on est a la racine - if input_tool_dir_name is None: - used_tool_names = kht.TOOL_NAMES - # Sinon, seul l'outil correspondant au tool dir est a tester - else: - tool_name = kht.TOOL_NAMES_PER_DIR_NAME[input_tool_dir_name] - used_tool_names = [tool_name] - - # Parcours des outils a evaluer - for tool_name in used_tool_names: - tool_dir_name = kht.TOOL_DIR_NAMES[tool_name] - if family == test_families.ALL: - test_suites = utils.sub_dirs(os.path.join(home_dir, tool_dir_name)) - else: - test_suites = test_families.FAMILY_TEST_SUITES[family, tool_name] - # On ne teste la validite de l'exe que s'il y au moins une suite dans la famille - if len(test_suites) > 0: - # On sort avec un message d'erreur si l'exe n'esiste pas - # Cela n'est fait a priori sur tous les outils - # Cela permet de lancer un test complet sur une famille, meme si l'exe de KNI - # (exploite en dernier) n'est pas disponible - tool_exe_path, error_message = build_tool_exe_path( - binaries_dir, tool_name - ) - if tool_exe_path is None: - utils.fatal_error(error_message) - evaluate_tool( - tool_name, - tool_exe_path, - home_dir, - test_suites, - **kwargs, - ) - - -def main(): - """Fonction principale de lancement d'un test""" - - def build_usage_help( - help_command, - help_binary_dir, - help_tool_dir_name=None, - help_suite_dir_name=None, - help_test_dir_name=None, - help_options=None, - ): - """Construction d'une ligne d'aide pour un usage de la commande test""" - source_dir = os.path.join(".", kht.LEARNING_TEST) - if help_test_dir_name is not None: - source_dir = os.path.join( - source_dir, help_tool_dir_name, help_suite_dir_name, help_test_dir_name - ) - elif help_suite_dir_name is not None: - source_dir = os.path.join( - source_dir, help_tool_dir_name, help_suite_dir_name - ) - elif help_tool_dir_name is not None: - source_dir = os.path.join(source_dir, help_tool_dir_name) - usage_help = help_command + " " + source_dir + " " + help_binary_dir - if help_options is not None: - usage_help += " " + help_options - return usage_help - - # Nom du script - script_file_name = os.path.basename(__file__) - script_name = os.path.splitext(script_file_name)[0] - - # Ajout d'exemples d'utilisation - epilog = "" - epilog += "Usage examples" - epilog += "\n " + build_usage_help(script_name, "r", help_options="-p 4") - epilog += "\n " + build_usage_help( - script_name, - '"C:\\Program Files\\khiops\\bin"', - kht.TOOL_DIR_NAMES[kht.KHIOPS], - "Standard", - "Iris", - ) - epilog += "\n " + build_usage_help( - script_name, - "d", - kht.TOOL_DIR_NAMES[kht.COCLUSTERING], - help_options="--max-test-time 5 --test-timeout-limit 1000", - ) - epilog += "\n " + build_usage_help(script_name, "check", help_options="-f basic") - - # Parametrage de l'analyse de la ligne de commande - parser = argparse.ArgumentParser( - prog=script_name, - description="test a tool on a subset of test dirs", - epilog=epilog, - formatter_class=utils.get_formatter_class(script_name), - ) - - # Arguments positionnels - utils.argument_parser_add_source_argument(parser) - parser.add_argument( - "binaries", - help="tool binaries dir," - " or one of the following aliases:\n" - " r, d: release or debug binary dir in developpement environnement\n" - " check: for comparison of test and reference results only\n", - ) - - # Arguments optionnels standards - utils.argument_parser_add_family_argument(parser) - utils.argument_parser_add_processes_argument(parser) - utils.argument_parser_add_forced_platform_argument(parser) - utils.argument_parser_add_limit_test_time_arguments(parser) - - # Temps de gestion d'un timeout - parser.add_argument( - "--test-timeout-limit", - help="kill overlengthy process exeeding timeout limit", - type=float, - metavar="t", - action="store", - ) - - # Mode avec fichier de tache - parser.add_argument( - "--task-file", - help="create a task progression file task_progression.log in results dir", - action="store_true", - ) - - # Mode avec scenario en sortie - parser.add_argument( - "--output-scenario", - help="create an output scenario output_test.prm in results dir", - action="store_true", - ) - - # Mode interface utilisateur - parser.add_argument( - "--user-interface", - help="run in user interface mode" - " (path to java and classpath with norm.jar must be defined)", - action="store_true", - ) - - # Analyse de la ligne de commande - args = parser.parse_args() - - # Verifications supplementaires des arguments - # On nomme les arguments concerne de la meme facon que pour le comportement par defaut - # des controles automatiques du parser - # Le rappel des noms des arguments est redondant avec la definition des arguments ajoutes, - # mais ce n'est pas trop lourd a maintenir - # (il n'y a pas d'api officielle d'introspection de la classe argparse) - - # Verification de l'argument source - ( - home_dir, - tool_dir_name, - suite_dir_name, - test_dir_name, - ) = utils.argument_parser_check_source_argument(parser, args.source) - - # Verification des arguments optionnels - utils.argument_parser_check_processes_argument(parser, args.n) - utils.argument_parser_check_limit_test_time_arguments( - parser, args.min_test_time, args.max_test_time - ) - if args.test_timeout_limit is not None and args.test_timeout_limit < 0: - parser.error("argument --test-timeout-limit must be positive") - - # Echec si le nombre de processus est parametre et mpiexec n'est pas dans le path - if args.n > 1 and shutil.which(mpi_exe_name) is None: - parser.error( - "argument -p/--processes: process number " - + str(args.n) - + " is greater than 1 but mpiexec not found in path." - ) - - # Echec si on est en mode interactif des elements de configuration minimaux sont absents - if args.user_interface: - # Pour l'instant, verification uniquement sous Windows - current_platform = results.get_context_platform_type() - if current_platform == "Windows": - # Verification de a presence de Java - # Ne suffit pas pour verifier que jvm.dll est dans le path, mais c'est deja ca - path_to_java = shutil.which("java.exe") - if path_to_java is None: - parser.error( - "argument --user-interface is set but Java not found in path" - ) - # Verification de la presence de norm.jar dans le classpath - classpath = os.getenv("classpath") - if classpath is None or "norm.jar" not in classpath: - parser.error( - "argument --user-interface is set but 'norm.jar' not found in classpath" - ) - - # Memorisation des variables globales de gestion du contexte des resultats de reference - results.process_number = args.n - results.forced_platform = args.forced_platform - - # Lancement de la commande - evaluate_all_tools_on_learning_test_tree( - home_dir, - tool_dir_name, - suite_dir_name, - test_dir_name, - args.binaries, - args.family, - min_test_time=args.min_test_time, - max_test_time=args.max_test_time, - test_timeout_limit=args.test_timeout_limit, - task_file=args.task_file, - output_scenario=args.output_scenario, - user_interface=args.user_interface, - ) - - -if __name__ == "__main__": - utils.set_flushed_outputs() - main() +# Copyright (c) 2024 Orange. All rights reserved. +# This software is distributed under the BSD 3-Clause-clear License, the text of which is available +# at https://spdx.org/licenses/BSD-3-Clause-Clear.html or see the "LICENSE" file for more details. + +import os.path +import sys +import platform +import shutil +import subprocess +import time +import argparse + +import _kht_constants as kht +import _kht_utils as utils +import _kht_families as test_families +import _kht_results_management as results +import _kht_check_results as check + +# mpiexec sous Windows +if os.name == "nt": + mpi_exe_name = "mpiexec.exe" +# mpiexec sous Linux +else: + mpi_exe_name = "mpirun" + + +def build_tool_exe_path(tool_binaries_dir, tool_name): + """Construction du chemin de l'executable d'un outil a partir du repertoire des binaire + Le premier parametre peut contenir plusieurs types de valeurs + - un repertoire devant contenir les binaire de l'outil a tester + - 'r' ou 'd', alias pour le repertoire des binaires en release ou debug de l'envbironnement de developpement + - 'check': pour effectuer seulment une comparaison entre resultats de test et de reference + On renvoie: + - le path complet d'un binaire d'un outil si un repertoire est specifie, 'check' sinon, None si erreur + - le message d'erreur en cas d'error + """ + assert tool_name in kht.TOOL_NAMES + tool_exe_path = None + error_message = "" + # Cas particulier de la comparaison seulement + if tool_binaries_dir == "check": + return "check", error_message + + # Recherche du repertoire des binaires de l'environnement de developpement + alias_info = "" + current_platform = results.get_context_platform_type() + assert current_platform in kht.RESULTS_REF_TYPE_VALUES[kht.PLATFORM] + # Cas d'un alias pour rechercher le repertoire des binaires dans l'environnement de developpement + actual_tool_binaries_dir = "" + if tool_binaries_dir in [kht.ALIAS_D, kht.ALIAS_R]: + script_path = __file__ + # Repertoire ou sont construit les produits de compilation + build_dir = os.path.realpath( + os.path.join( + script_path, + "..", + "..", + "..", + "..", + "build", + ) + ) + # Suffixe du nom du repertoire contenant les binaires + searched_suffix = "" + if tool_binaries_dir == kht.ALIAS_D: + searched_suffix = "-debug" + elif tool_binaries_dir == kht.ALIAS_R: + searched_suffix = "-release" + # Recherche des sous repertoire contenant le bon suffixe, plus un sous-repertoire bin + candidate_binaries_dirs = [] + if os.path.isdir(build_dir): + for name in os.listdir(build_dir): + if ( + searched_suffix in name + and name[len(name) - len(searched_suffix) :] == searched_suffix + ): + binaries_dir = os.path.join(build_dir, name, "bin") + if os.path.isdir(binaries_dir): + candidate_binaries_dirs.append(binaries_dir) + # Erreur si repertoire des binaires non trouve + if len(candidate_binaries_dirs) == 0: + error_message = ( + "Tool binaries dir for alias '" + + tool_binaries_dir + + "' not found in current khiops repo under the bin dir " + + build_dir + ) + # Erreur si plusieurs repertoires des binaires non trouves + elif len(candidate_binaries_dirs) > 1: + error_message = ( + "Multiple tool binaries dir found for alias '" + + tool_binaries_dir + + "' in current khiops repo under the bin dir :" + + utils.list_to_label(candidate_binaries_dirs) + ) + # On a trouve un repertoire des binaires + else: + assert len(candidate_binaries_dirs) == 1 + actual_tool_binaries_dir = candidate_binaries_dirs[0] + + # Infos sur l'alias, pour les messages d'erreur + alias_info = ( + " (used for alias '" + + tool_binaries_dir + + "' on platform " + + current_platform + + ")" + ) + # Cas d'un repertoire des binaires specifie directement + else: + actual_tool_binaries_dir = os.path.realpath(tool_binaries_dir) + assert actual_tool_binaries_dir != "" or error_message != "" + + # Test qu'il s'agit bien d'un repertoire + if error_message == "" and not os.path.isdir(actual_tool_binaries_dir): + error_message = ( + tool_name + + " binary " + + actual_tool_binaries_dir + + " dir" + + alias_info + + " is not a valid directory" + ) + + # Construction du path du binaire de l'outil + if error_message == "": + tool_exe_name = kht.TOOL_EXE_NAMES[tool_name] + if current_platform == "Windows": + tool_exe_name += ".exe" + tool_exe_path = os.path.join(actual_tool_binaries_dir, tool_exe_name) + if not os.path.isfile(tool_exe_path): + tool_exe_path = None + # si le binaire n'existe pas, c'est peut-etre un binaire parallele qui a un suffixe + if tool_name in kht.PARALLEL_TOOL_NAMES: + tool_with_suffixes = [] + tested_binaries_name = [] + # construction de la liste des binaires avec suffixe qui sont presents dans le repertoire bin + for suffix in kht.TOOL_MPI_SUFFIXES: + tool_exe_name = kht.TOOL_EXE_NAMES[tool_name] + suffix + if platform == "Windows": + tool_exe_name += ".exe" + tested_binaries_name.append(tool_exe_name) + tool_exe_path = os.path.join( + actual_tool_binaries_dir, tool_exe_name + ) + if os.path.isfile(tool_exe_path): + tool_with_suffixes.append(tool_exe_path) + # Si il y en a plusieurs ou aucun, il y a une erreur + if len(tool_with_suffixes) == 0: + tool_exe_path = None + tool_full_name = "" + for name in tested_binaries_name: + tool_full_name += name + " " + tool_full_name += kht.TOOL_EXE_NAMES[tool_name] + error_message = ( + "no binaries found for " + + tool_name + + " (" + + tool_full_name.rstrip() + + ") in " + + actual_tool_binaries_dir + + alias_info + ) + elif len(tool_with_suffixes) > 1: + tool_exe_path = None + conflict_names = "" + for name in tool_with_suffixes: + conflict_names += os.path.basename(name) + " " + error_message = ( + "multiple binaries found for " + + tool_name + + " (" + + conflict_names.rstrip() + + ") in " + + actual_tool_binaries_dir + + alias_info + ) + else: + tool_exe_path = tool_with_suffixes[0] + # Message d'erreur par defaut + if tool_exe_path == None and error_message == "": + error_message = ( + tool_name + + " binary (" + + tool_exe_name + + ") not found in tool binaries dir " + + actual_tool_binaries_dir + + alias_info + ) + return tool_exe_path, error_message + + +def evaluate_tool_on_test_dir( + tool_exe_path, + suite_dir, + test_dir_name, + min_test_time=None, + max_test_time=None, + test_timeout_limit=None, + task_file=False, + output_scenario=False, + user_interface=False, +): + """Evaluation d'un outil sur un repertoire de test terminal et comparaison des resultats + Parametres: + - tool_exe_path: path de l'outil a tester, ou nul si on ne veut faire que la comparaison + - suite_dir: repertoire racine du repertoire de test + - test_dir_name: repertoire de test terminal""" + + # Verification du chemin de l'exe + if tool_exe_path != kht.ALIAS_CHECK: + if not os.path.isfile(tool_exe_path): + utils.fatal_error("tool path : " + tool_exe_path + " is not correct") + + # Verification de l'integrite du repertoire de test + test_dir = os.path.join(suite_dir, test_dir_name) + utils.check_test_dir(test_dir) + + # Extraction des repertoires principaux + suite_dir_name = utils.dir_name(suite_dir) + tool_dir_name = utils.parent_dir_name(suite_dir, 1) + + # Nom de l'outil + tool_name = kht.TOOL_NAMES_PER_DIR_NAME.get(tool_dir_name) + + # Recherche du chemin de l'executable et positionnement du path pour l'exe et la dll + tool_exe_dir = os.path.dirname(tool_exe_path) + if os.name == "nt": + initial_path = os.getenv("path") + os.environ["path"] = tool_exe_dir + ";" + os.getenv("path") + else: + initial_path = os.getenv("LD_LIBRARY_PATH", "") + os.environ["LD_LIBRARY_PATH"] = ( + tool_exe_dir + ":" + os.getenv("LD_LIBRARY_PATH", "") + ) + + # On se met dans le repertoire de test + os.chdir(test_dir) + + # Recherche du contexte parallele + tool_process_number = results.process_number + if tool_name not in kht.PARALLEL_TOOL_NAMES: + tool_process_number = 1 + + # Affichage du debut des tests ou de la comparaison + action_name = "Test" + exe_path_info = "\n exe: " + tool_exe_path + if tool_exe_path == kht.ALIAS_CHECK: + action_name = "Comparison" + exe_path_info = "" + print( + "starting " + + action_name + + " " + + tool_dir_name + + " " + + suite_dir_name + + " " + + test_dir_name + + " (processes: " + + str(tool_process_number) + + ", platform: " + + results.get_context_platform_type() + + ")" + + exe_path_info + ) + + # Lancement des tests + if tool_exe_path != kht.ALIAS_CHECK: + # Recherche du nom du l'executable Khiops (sans l'extension) + tool_exe_full_name, _ = os.path.splitext(os.path.basename(tool_exe_path)) + + # ... et sans le suffixe mpi + tool_exe_name = utils.extract_tool_exe_name(tool_exe_full_name) + + # Recherche du nom de l'outil correspondant + if tool_exe_name not in kht.TOOL_EXE_NAMES.values(): + utils.fatal_error( + "tool exe " + + tool_exe_name + + " from " + + tool_exe_path + + " should be in " + + utils.list_to_label(kht.TOOL_EXE_NAMES.values()) + ) + tool_name_per_exe_name = kht.TOOL_NAMES_PER_EXE_NAME.get(tool_exe_name) + if tool_name_per_exe_name != tool_name: + utils.fatal_error( + "Tool exe " + + tool_exe_path + + " inconsistent with tool directory " + + tool_dir_name + ) + + # Recherche du temps des resultats de reference dans le fichier de temps + results_ref_test_time = results.get_results_ref_dir_time(test_dir) + + # Arret si test trop long ou trop court + if not results.is_results_ref_dir_time_selected( + test_dir, min_test_time, max_test_time + ): + print( + test_dir_name + + " test not launched (test time: " + + str(results_ref_test_time) + + ")\n" + ) + return + + # Nettoyage du repertoire de resultats + results_dir = os.path.join(test_dir, kht.RESULTS) + if os.path.isdir(results_dir): + for file_name in os.listdir(results_dir): + file_path = os.path.join(results_dir, file_name) + utils.remove_file(file_path) + else: + # Creation du repertoire de resultats + # (on n'est pas sur que khiops cree le repertoire, par exemple en cas de Segmentation + # fault et si il n'existe pas on ne pourra pas ecrire dedans...) + os.mkdir(results_dir) + + # khiops en mode expert via une variable d'environnement + os.environ[kht.KHIOPS_EXPERT_MODE] = "true" + + # khiops en mode HardMemoryLimit via une variable d'environnement pour provoquer + # un plantage physique de l'allocateur en cas de depassement des contraintes memoires des scenarios + os.environ[kht.KHIOPS_HARD_MEMORY_LIMIT_MODE] = "true" + + # khiops en mode crash test via une variable d'environnement + os.environ[kht.KHIOPS_CRASH_TEST_MODE] = "true" + + # Ajout de variables d'environements propres a OpenMPI, elles remplacent les parametres + # on peut ainsi lancer indiferemment mpich ou openmpi + if platform.system() == "Linux": + # Supprime les traces en cas d'erreur fatale de khiops. Option --quiet + os.environ["OMPI_MCA_orte_execute_quiet"] = "true" + + # permet de lancer plus de processus qu'il n'y a de coeurs. Option --oversubscribe + os.environ["OMPI_MCA_rmaps_base_oversubscribe"] = "true" + + # Construction des parametres + khiops_params = [] + if tool_process_number > 1: + khiops_params.append(mpi_exe_name) + + # Option -l, specifique a mpich, valide au moins pour Windows: + # "Label standard out and standard error (stdout and stderr) with the rank of the process" + if platform.system() == "Windows": + khiops_params.append("-l") + if platform.system() == "Darwin": + khiops_params.append("-host") + khiops_params.append("localhost") + + # Option --allow-run-as-root, specifique a OpenMPI, + # permet de lancer OpenMPI en tant que root de maniere portable sur + # tous les OS supportes. + # Cette option remplace le positionnement des variables + # d'environnement OMPI_ALLOW_RUN_AS_ROOT et + # OMPI_ALLOW_RUN_AS_ROOT_CONFIRM à '1'. + # Sous Debian 10, OpenMPI ne prend pas en compte ces deux variables + # d'environnement. + if platform.system() == "Linux": + # En iterant sur TOOL_MPI_SUFFIXES, on s'assure que "_openmpi" + # fait toujours partie des back-ends MPI supportes + for suffix in kht.TOOL_MPI_SUFFIXES: + if tool_exe_path.endswith(suffix) and suffix == "_openmpi": + khiops_params.append("--allow-run-as-root") + break + khiops_params.append("-n") + khiops_params.append(str(tool_process_number)) + khiops_params.append(tool_exe_path) + if not user_interface: + khiops_params.append("-b") + khiops_params.append("-i") + khiops_params.append(kht.TEST_PRM) + khiops_params.append("-e") + khiops_params.append(os.path.join(results_dir, kht.ERR_TXT)) + if output_scenario: + khiops_params.append("-o") + khiops_params.append(os.path.join(results_dir, "output_test.prm")) + if task_file: + khiops_params.append("-p") + khiops_params.append(os.path.join(results_dir, "task_progression.log")) + + # Calcul d'un time_out en fonction du temps de reference, uniquement si celui est disponible + timeout = None + if results_ref_test_time is not None: + if test_timeout_limit is None: + test_timeout_limit = kht.MIN_TIMEOUT + timeout = test_timeout_limit + kht.TIMEOUT_RATIO * results_ref_test_time + + # Lancement de khiops + timeout_expiration_lines = [] + overall_time_start = time.time() + for run_number in range(kht.MAX_RUN_NUMBER): + run_completed = True + time_start = time.time() + with subprocess.Popen( + khiops_params, + stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True, + ) as khiops_process: + try: + stdout, stderr = khiops_process.communicate(timeout=timeout) + except subprocess.TimeoutExpired: + run_completed = False + khiops_process.kill() + stdout, stderr = khiops_process.communicate() + time_stop = time.time() + # Memorisation du probleme en cas d'echec + if not run_completed: + killing_time = time_stop - time_start + results_ref_test_time_info = "" + if results_ref_test_time is not None: + results_ref_test_time_info = ( + " (reference time=" + + "{:.1f}".format(results_ref_test_time) + + "s)" + ) + timeout_expiration_lines.append( + "Trial " + + str(run_number + 1) + + " : process killed after " + + "{:.1f}".format(killing_time) + + "s" + + results_ref_test_time_info + ) + # Arret si ok + if run_completed: + break + # Arret si on a depense globalement trop de temps + overall_time = time_stop - overall_time_start + if overall_time > kht.MAX_TIMEOUT and run_number < kht.MAX_RUN_NUMBER - 1: + timeout_expiration_lines.append( + "No more trial: overall trial time is " + + "{:.1f}".format(overall_time) + + "s (limit=" + + "{:.1f}".format(kht.MAX_TIMEOUT) + + "s)" + ) + break + overall_time_stop = time.time() + + # Memorisation des infos sur les run en cas de timeout + if len(timeout_expiration_lines) > 0: + with open( + os.path.join(results_dir, kht.PROCESS_TIMEOUT_ERROR_LOG), + "w", + errors="ignore", + ) as timeout_file: + for line in timeout_expiration_lines: + timeout_file.write(line + "\n") + + # En cas d'anomalie, memorisation du contenu des sorties standard + if stdout != "": + # Affichage sur la console, utile par exemple en mode debug pour avoir les stats memoire + print(stdout) + + # Pretraitement des lignes pour supprimer les lignes normales + # parfois specifiques a certains outils + is_kni = kht.KNI in tool_exe_path + is_coclustering = kht.COCLUSTERING in tool_exe_path + lines = stdout.split("\n") + lines = utils.filter_process_id_prefix_from_lines( + lines + ) # Suppression de l'eventuel prefix de type '[0] ' + lines = utils.filter_copyright_lines( + lines + ) # Suppression eventuelle des lignes de copyright + lines = utils.filter_empty_lines(lines) # Suppression des lignes vides + + # Pour les test KNI, le stdout contient une ligne avec le nombre de records + if is_kni: + lines = utils.filter_lines_with_pattern( + lines, ["Recoded record number:"] + ) + lines = utils.filter_lines_with_pattern( + lines, ["Error : Finish opening stream error:"] + ) + # Cas particulier du coclustering en mode debug + if is_coclustering: + lines = utils.filter_lines_with_pattern( + lines, ["BEWARE: Optimization level set to 0 in debug mode only!!!"] + ) + # Exception egalement pour cas des lancement en mode parallele simule + lines = utils.filter_lines_with_pattern( + lines, ["Warning : simulated parallel mode"] + ) + # Exception en mode debug, pour les stats memoire + if "Memory stats (number of pointers, and memory space)" in stdout: + ok = True + # Parcours des lignes pour voir si ce sont bien des messages de stats, y compris en parallel + # En parallele, on a l'id du process entre crochets en tete de chaque ligne + for line in lines: + # Ok si ligne vide + if line == "": + ok = True + # Recherche d'un pattern de message de l'allocateur + else: + ok = ( + "Memory stats (number of pointers, and memory space)" + in line + or "Alloc: " in line + or "Requested: " in line + ) + # Recherche additionnelle de "Process " en tete de ligne + # En effet, parfois en parallele, le debut d'un message commencant par "Process " + # est emis sur une ligne de stdout, et la fin sur une autre ligne + if not ok: + ok = line.find("Process ") >= 0 + if not ok: + break + else: + ok = len(lines) == 0 + if not ok: + try: + with open( + os.path.join(results_dir, kht.STDOUT_ERROR_LOG), + "w", + errors="ignore", + ) as stdout_file: + stdout_file.write(stdout) + except Exception as exception: + print( + "Enable to write file " + + kht.STDOUT_ERROR_LOG + + " in " + + kht.RESULTS + + " dir ", + exception, + ) + # Cas de la sortie d'erreur standard + if stderr != "": + print(stderr, file=sys.stderr) + try: + with open( + os.path.join(results_dir, kht.STDERR_ERROR_LOG), + "w", + errors="ignore", + ) as stderr_file: + stderr_file.write(stderr) + except Exception as exception: + print( + "Enable to write file " + + kht.STDERR_ERROR_LOG + + " in " + + kht.RESULTS + + " dir ", + exception, + ) + # Cas du code retour + if khiops_process.returncode != 0: + try: + with open( + os.path.join(results_dir, kht.RETURN_CODE_ERROR_LOG), + "w", + errors="ignore", + ) as return_code_file: + return_code_file.write( + "Wrong return code: " + + str(khiops_process.returncode) + + " (should be 0)" + ) + except Exception as exception: + print( + "Enable to write file " + + kht.RETURN_CODE_ERROR_LOG + + " in " + + kht.RESULTS + + " dir ", + exception, + ) + # Message de fin de test + print(tool_dir_name + " " + suite_dir_name + " " + test_dir_name + " test done") + + # Memorisation d'un fichier contenant le temp global + try: + with open( + os.path.join(results_dir, kht.TIME_LOG), + "w", + errors="ignore", + ) as time_file: + time_file.write(str(overall_time_stop - overall_time_start) + "\n") + except Exception as exception: + print( + "Enable to write file " + kht.TIME_LOG + " in " + kht.RESULTS + " dir ", + exception, + ) + + # Restore initial path + if os.name == "nt": + os.environ["path"] = initial_path + else: + os.environ["LD_LIBRARY_PATH"] = initial_path + + # Comparaison des resultats + os.chdir(suite_dir) + test_dir = os.path.join(suite_dir, test_dir_name) + check.check_results(test_dir) + + +def evaluate_tool_on_suite_dir(tool_exe_path, suite_dir, test_dir_name=None, **kwargs): + """Evaluation d'un outil sur une suite de test et comparaison des resultats + Parametres: + - tool_exe_path: path de l'outil a tester, ou nul si on ne veut faire que la comparaison + - suite_dir: repertoire de la suite de test + - test_dir_name: repertoire de test terminal""" + + # Erreur si repertoire de suite absent + if not os.path.isdir(suite_dir): + utils.fatal_error("missing directory for test suite " + suite_dir) + + # Collecte des sous-repertoire de test + test_list = [] + for file_name in os.listdir(suite_dir): + if os.path.isdir(os.path.join(suite_dir, file_name)): + test_list.append(file_name) + + # Erreur si pas de sous-repertoires + if len(test_list) == 0: + utils.fatal_error("no test dir is available in " + suite_dir) + + # Cas d'un repertoire de test specifique + if test_dir_name is not None: + evaluate_tool_on_test_dir(tool_exe_path, suite_dir, test_dir_name, **kwargs) + + # Cas de tous les sous-repertoires + else: + for name in test_list: + evaluate_tool_on_test_dir(tool_exe_path, suite_dir, name, **kwargs) + # Message global + suite_dir_name = utils.dir_name(suite_dir) + tool_dir_name = utils.parent_dir_name(suite_dir, 1) + action_name = "TEST" + if tool_exe_path == "nul": + action_name = "COMPARISON" + print(action_name + " DONE\t" + tool_dir_name + "\t" + suite_dir_name) + + +def evaluate_tool(tool_name, tool_exe_path, home_dir, test_suites, **kwargs): + """Lance les tests d'un outil sur un ensemble de suites de tests""" + assert tool_name in kht.TOOL_NAMES + assert utils.check_home_dir(home_dir) + # Recherche du repertoire lie a l'outil + tool_dir_name = kht.TOOL_DIR_NAMES[tool_name] + # Lancement des tests sur les repertoires valides + for suite_dir_name in test_suites: + suite_dir = os.path.join( + home_dir, + tool_dir_name, + suite_dir_name, + ) + if os.path.isdir(suite_dir): + print("\n\n--------------------------------------------------------") + print("\tRunning " + tool_name + " " + suite_dir_name + " tests") + print("--------------------------------------------------------") + suite_dir = os.path.join( + home_dir, + tool_dir_name, + suite_dir_name, + ) + evaluate_tool_on_suite_dir(tool_exe_path, suite_dir, **kwargs) + + +def evaluate_all_tools_on_learning_test_tree( + home_dir, + input_tool_dir_name, + input_suite_dir_name, + input_test_dir_name, + binaries_dir, + family, + **kwargs +): + """Lance les tests des outils un ensemble de suites de tests + Tout ou partie de l'arborescence est prise en compte selon la specification + des operandes tool_dir_name, suite_dir_name, test_dir_name, qui peuvent etre None sinon. + - home_dir: repertoire principal de l'aborescence source + - tool_dir_name, suite_dir_name, test_dir_name: pour ne prendre en compte qu'une sous-partie + de l'arborescence source si ces oprande ne sont pas None + - binaries_dir: repertorie des executables des outils + - family: famille utilise pour choisir la sous-partie des suites a exporter + - kwargs: argument optionnels de la ligne de commande + """ + # Tous les outils sont a prendre en compte si on est a la racine + if input_tool_dir_name is None: + used_tool_names = kht.TOOL_NAMES + # Sinon, seul l'outil correspondant au tool dir est a tester + else: + tool_name = kht.TOOL_NAMES_PER_DIR_NAME[input_tool_dir_name] + used_tool_names = [tool_name] + + # Parcours des repertoires des outils verifier les repertoires de suite et nettoyer les resultats + suite_errors = False + for tool_name in used_tool_names: + tool_dir_name = kht.TOOL_DIR_NAMES[tool_name] + tool_exe_path, error_message = build_tool_exe_path(binaries_dir, tool_name) + # Recherche des suites a utiliser + if input_suite_dir_name is not None: + assert tool_dir_name is not None + suite_dir_names = [input_suite_dir_name] + elif family == test_families.ALL: + suite_dir_names = utils.sub_dirs(os.path.join(home_dir, tool_dir_name)) + else: + suite_dir_names = test_families.FAMILY_TEST_SUITES[family, tool_name] + # Parcours des suites de la famille + for suite_dir_name in suite_dir_names: + suite_dir = os.path.join( + home_dir, + tool_dir_name, + suite_dir_name, + ) + if os.path.isdir(suite_dir): + # Repertoires de test a utiliser + if input_test_dir_name is not None: + test_dir_names = [input_test_dir_name] + else: + test_dir_names = os.listdir(suite_dir) + # Parcours des repertoires de test de la suite + for test_dir_name in test_dir_names: + # Nettoyage sauf si seulement comparaisons des resultats + if tool_exe_path != kht.ALIAS_CHECK: + test_dir = os.path.join(suite_dir, test_dir_name) + if os.path.isdir(test_dir): + # Nettoyage uniquement si test compatible avec les contraIntes de temps + if results.is_results_ref_dir_time_selected( + test_dir, + kwargs["min_test_time"], + kwargs["max_test_time"], + ): + file_path = os.path.join( + test_dir, kht.COMPARISON_RESULTS_LOG + ) + if os.path.isfile(file_path): + utils.remove_file(file_path) + results_dir = os.path.join(test_dir, kht.RESULTS) + if os.path.isdir(results_dir): + for file_name in os.listdir(results_dir): + file_path = os.path.join(results_dir, file_name) + utils.remove_file(file_path) + utils.remove_dir(results_dir) + # Message d'erreur si suite inexistante + else: + if not suite_errors: + print("") + suite_errors = True + print("error : suite directory not found: " + suite_dir) + + # Cas d'un seul outil avec un repertoire de suite au de test specifique + # Dans ce cas, on ignore la famille + if input_suite_dir_name is not None: + assert input_tool_dir_name is not None + tool_name = kht.TOOL_NAMES_PER_DIR_NAME[input_tool_dir_name] + tool_exe_path, error_message = build_tool_exe_path(binaries_dir, tool_name) + if tool_exe_path is None: + utils.fatal_error(error_message) + suite_dir = os.path.join(home_dir, input_tool_dir_name, input_suite_dir_name) + evaluate_tool_on_suite_dir( + tool_exe_path, + suite_dir, + input_test_dir_name, + **kwargs, + ) + # Cas d'un ou plusieurs outils, ou il faut utiliser les suites de la famille specifiee + else: + # Tous les outils sont a prendre en compte si on est a la racine + if input_tool_dir_name is None: + used_tool_names = kht.TOOL_NAMES + # Sinon, seul l'outil correspondant au tool dir est a tester + else: + tool_name = kht.TOOL_NAMES_PER_DIR_NAME[input_tool_dir_name] + used_tool_names = [tool_name] + + # Parcours des outils a evaluer + for tool_name in used_tool_names: + tool_dir_name = kht.TOOL_DIR_NAMES[tool_name] + if family == test_families.ALL: + test_suites = utils.sub_dirs(os.path.join(home_dir, tool_dir_name)) + else: + test_suites = test_families.FAMILY_TEST_SUITES[family, tool_name] + # On ne teste la validite de l'exe que s'il y au moins une suite dans la famille + if len(test_suites) > 0: + # On sort avec un message d'erreur si l'exe n'esiste pas + # Cela n'est fait a priori sur tous les outils + # Cela permet de lancer un test complet sur une famille, meme si l'exe de KNI + # (exploite en dernier) n'est pas disponible + tool_exe_path, error_message = build_tool_exe_path( + binaries_dir, tool_name + ) + if tool_exe_path is None: + utils.fatal_error(error_message) + evaluate_tool( + tool_name, + tool_exe_path, + home_dir, + test_suites, + **kwargs, + ) + + +def main(): + """Fonction principale de lancement d'un test""" + + def build_usage_help( + help_command, + help_binary_dir, + help_tool_dir_name=None, + help_suite_dir_name=None, + help_test_dir_name=None, + help_options=None, + ): + """Construction d'une ligne d'aide pour un usage de la commande test""" + source_dir = os.path.join(".", kht.LEARNING_TEST) + if help_test_dir_name is not None: + source_dir = os.path.join( + source_dir, help_tool_dir_name, help_suite_dir_name, help_test_dir_name + ) + elif help_suite_dir_name is not None: + source_dir = os.path.join( + source_dir, help_tool_dir_name, help_suite_dir_name + ) + elif help_tool_dir_name is not None: + source_dir = os.path.join(source_dir, help_tool_dir_name) + usage_help = help_command + " " + source_dir + " " + help_binary_dir + if help_options is not None: + usage_help += " " + help_options + return usage_help + + # Nom du script + script_file_name = os.path.basename(__file__) + script_name = os.path.splitext(script_file_name)[0] + + # Ajout d'exemples d'utilisation + epilog = "" + epilog += "Usage examples" + epilog += "\n " + build_usage_help(script_name, "r", help_options="-p 4") + epilog += "\n " + build_usage_help( + script_name, + '"C:\\Program Files\\khiops\\bin"', + kht.TOOL_DIR_NAMES[kht.KHIOPS], + "Standard", + "Iris", + ) + epilog += "\n " + build_usage_help( + script_name, + "d", + kht.TOOL_DIR_NAMES[kht.COCLUSTERING], + help_options="--max-test-time 5 --test-timeout-limit 1000", + ) + epilog += "\n " + build_usage_help(script_name, "check", help_options="-f basic") + + # Parametrage de l'analyse de la ligne de commande + parser = argparse.ArgumentParser( + prog=script_name, + description="test a tool on a subset of test dirs", + epilog=epilog, + formatter_class=utils.get_formatter_class(script_name), + ) + + # Arguments positionnels + utils.argument_parser_add_source_argument(parser) + parser.add_argument( + "binaries", + help="tool binaries dir," + " or one of the following aliases:\n" + " r, d: release or debug binary dir in developpement environnement\n" + " check: for comparison of test and reference results only\n", + ) + + # Arguments optionnels standards + utils.argument_parser_add_family_argument(parser) + utils.argument_parser_add_processes_argument(parser) + utils.argument_parser_add_forced_platform_argument(parser) + utils.argument_parser_add_limit_test_time_arguments(parser) + + # Temps de gestion d'un timeout + parser.add_argument( + "--test-timeout-limit", + help="kill overlengthy process exeeding timeout limit", + type=float, + metavar="t", + action="store", + ) + + # Mode avec fichier de tache + parser.add_argument( + "--task-file", + help="create a task progression file task_progression.log in results dir", + action="store_true", + ) + + # Mode avec scenario en sortie + parser.add_argument( + "--output-scenario", + help="create an output scenario output_test.prm in results dir", + action="store_true", + ) + + # Mode interface utilisateur + parser.add_argument( + "--user-interface", + help="run in user interface mode" + " (path to java and classpath with norm.jar must be defined)", + action="store_true", + ) + + # Analyse de la ligne de commande + args = parser.parse_args() + + # Verifications supplementaires des arguments + # On nomme les arguments concerne de la meme facon que pour le comportement par defaut + # des controles automatiques du parser + # Le rappel des noms des arguments est redondant avec la definition des arguments ajoutes, + # mais ce n'est pas trop lourd a maintenir + # (il n'y a pas d'api officielle d'introspection de la classe argparse) + + # Verification de l'argument source + ( + home_dir, + tool_dir_name, + suite_dir_name, + test_dir_name, + ) = utils.argument_parser_check_source_argument(parser, args.source) + + # Verification des arguments optionnels + utils.argument_parser_check_processes_argument(parser, args.n) + utils.argument_parser_check_limit_test_time_arguments( + parser, args.min_test_time, args.max_test_time + ) + if args.test_timeout_limit is not None and args.test_timeout_limit < 0: + parser.error("argument --test-timeout-limit must be positive") + + # Echec si le nombre de processus est parametre et mpiexec n'est pas dans le path + if args.n > 1 and shutil.which(mpi_exe_name) is None: + parser.error( + "argument -p/--processes: process number " + + str(args.n) + + " is greater than 1 but mpiexec not found in path." + ) + + # Echec si on est en mode interactif des elements de configuration minimaux sont absents + if args.user_interface: + # Pour l'instant, verification uniquement sous Windows + current_platform = results.get_context_platform_type() + if current_platform == "Windows": + # Verification de a presence de Java + # Ne suffit pas pour verifier que jvm.dll est dans le path, mais c'est deja ca + path_to_java = shutil.which("java.exe") + if path_to_java is None: + parser.error( + "argument --user-interface is set but Java not found in path" + ) + # Verification de la presence de norm.jar dans le classpath + classpath = os.getenv("classpath") + if classpath is None or "norm.jar" not in classpath: + parser.error( + "argument --user-interface is set but 'norm.jar' not found in classpath" + ) + + # Memorisation des variables globales de gestion du contexte des resultats de reference + results.process_number = args.n + results.forced_platform = args.forced_platform + + # Lancement de la commande + evaluate_all_tools_on_learning_test_tree( + home_dir, + tool_dir_name, + suite_dir_name, + test_dir_name, + args.binaries, + args.family, + min_test_time=args.min_test_time, + max_test_time=args.max_test_time, + test_timeout_limit=args.test_timeout_limit, + task_file=args.task_file, + output_scenario=args.output_scenario, + user_interface=args.user_interface, + ) + + +if __name__ == "__main__": + utils.set_flushed_outputs() + main()