From 578b850ad846efadeb5e2f43557f5d7652b2539b Mon Sep 17 00:00:00 2001 From: Gautham B A Date: Wed, 1 Nov 2023 21:40:15 +0530 Subject: [PATCH] HDFS-17246. Fix shaded client for building Hadoop on Windows (#5943) --- dev-support/bin/hadoop.sh | 67 +++++++++++-------- dev-support/docker/Dockerfile_windows_10 | 32 ++++++--- .../org/apache/hadoop/hdfs/DFSUtilClient.java | 4 +- 3 files changed, 67 insertions(+), 36 deletions(-) diff --git a/dev-support/bin/hadoop.sh b/dev-support/bin/hadoop.sh index b51a9525bd89e..fd9363e66bbd7 100755 --- a/dev-support/bin/hadoop.sh +++ b/dev-support/bin/hadoop.sh @@ -21,18 +21,20 @@ personality_plugins "all,-ant,-gradle,-scalac,-scaladoc" # These flags are needed to run Yetus against Hadoop on Windows. -WINDOWS_FLAGS="-Pnative-win - -Dhttps.protocols=TLSv1.2 - -Drequire.openssl - -Drequire.test.libhadoop - -Dshell-executable=${BASH_EXECUTABLE} - -Dopenssl.prefix=${VCPKG_INSTALLED_PACKAGES} - -Dcmake.prefix.path=${VCPKG_INSTALLED_PACKAGES} - -Dwindows.cmake.toolchain.file=${CMAKE_TOOLCHAIN_FILE} - -Dwindows.cmake.build.type=RelWithDebInfo - -Dwindows.build.hdfspp.dll=off - -Dwindows.no.sasl=on - -Duse.platformToolsetVersion=v142" +WINDOWS_FLAGS=( + "-Pnative-win" + "-Dhttps.protocols=TLSv1.2" + "-Drequire.openssl" + "-Drequire.test.libhadoop" + "-Dshell-executable=${BASH_EXECUTABLE}" + "-Dopenssl.prefix=${VCPKG_INSTALLED_PACKAGES}" + "-Dcmake.prefix.path=${VCPKG_INSTALLED_PACKAGES}" + "-Dwindows.cmake.toolchain.file=${CMAKE_TOOLCHAIN_FILE}" + "-Dwindows.cmake.build.type=RelWithDebInfo" + "-Dwindows.build.hdfspp.dll=off" + "-Dwindows.no.sasl=on" + "-Duse.platformToolsetVersion=v142" +) ## @description Globals specific to this personality ## @audience private @@ -292,7 +294,7 @@ function hadoop_native_flags -Drequire.snappy \ -Pdist \ -Dtar \ - "${WINDOWS_FLAGS}" + "${WINDOWS_FLAGS[@]}" ;; *) echo \ @@ -436,7 +438,7 @@ function personality_modules fi if [[ "$IS_WINDOWS" && "$IS_WINDOWS" == 1 ]]; then - extra="-Ptest-patch -Pdist -Dtar ${WINDOWS_FLAGS} ${extra}" + extra="-Ptest-patch -Pdist -Dtar ${WINDOWS_FLAGS[*]} ${extra}" fi for module in $(hadoop_order ${ordering}); do @@ -557,14 +559,6 @@ function shadedclient_rebuild declare module declare -a modules=() - if [[ ${OSTYPE} = Windows_NT || - ${OSTYPE} =~ ^CYGWIN.* || - ${OSTYPE} =~ ^MINGW32.* || - ${OSTYPE} =~ ^MSYS.* ]]; then - echo "hadoop personality: building on windows, skipping check of client artifacts." - return 0 - fi - yetus_debug "hadoop personality: seeing if we need the test of client artifacts." for module in hadoop-client-modules/hadoop-client-check-invariants \ hadoop-client-modules/hadoop-client-check-test-invariants \ @@ -581,28 +575,47 @@ function shadedclient_rebuild big_console_header "Checking client artifacts on ${repostatus} with shaded clients" - extra="-Dtest=NoUnitTests -Dmaven.javadoc.skip=true -Dcheckstyle.skip=true -Dspotbugs.skip=true" + extra=( + "-Dtest=NoUnitTests" + "-Dmaven.javadoc.skip=true" + "-Dcheckstyle.skip=true" + "-Dspotbugs.skip=true" + ) if [[ "$IS_WINDOWS" && "$IS_WINDOWS" == 1 ]]; then + # shellcheck disable=SC2206 + extra+=(${WINDOWS_FLAGS[*]}) + + # The shaded client integration tests require the Hadoop jars that were just built to be + # installed in the local maven repository. + # shellcheck disable=SC2086 + echo_and_redirect "${logfile}" \ + "${MAVEN}" "${MAVEN_ARGS[@]}" install -fae --batch-mode \ + -DskipTests -DskipDocs -Pdist -Dtar ${extra[*]} + + # The shaded client integration tests spawn a MiniDFS and MiniYARN cluster for testing. Both of + # them require winutils.exe to be found in the PATH and HADOOP_HOME to be set. if load_hadoop_version; then export HADOOP_HOME="${SOURCEDIR}/hadoop-dist/target/hadoop-${HADOOP_VERSION}-SNAPSHOT" + WIN_HADOOP_HOME=$(cygpath -w -a "${HADOOP_HOME}") + export PATH="${PATH};${WIN_HADOOP_HOME}\bin" else yetus_error "[WARNING] Unable to extract the Hadoop version and thus HADOOP_HOME is not set. Some tests may fail." fi - - extra="${WINDOWS_FLAGS} ${extra}" fi + # shellcheck disable=SC2086 echo_and_redirect "${logfile}" \ - "${MAVEN}" "${MAVEN_ARGS[@]}" verify -fae --batch-mode -am "${modules[@]}" "${extra}" + "${MAVEN}" "${MAVEN_ARGS[@]}" verify -fae --batch-mode -am "${modules[@]}" ${extra[*]} big_console_header "Checking client artifacts on ${repostatus} with non-shaded clients" + # shellcheck disable=SC2086 echo_and_redirect "${logfile}" \ "${MAVEN}" "${MAVEN_ARGS[@]}" verify -fae --batch-mode -am \ "${modules[@]}" \ -DskipShade -Dtest=NoUnitTests -Dmaven.javadoc.skip=true -Dcheckstyle.skip=true \ - -Dspotbugs.skip=true "${extra}" + -Dspotbugs.skip=true ${extra[*]} count=$("${GREP}" -c '\[ERROR\]' "${logfile}") if [[ ${count} -gt 0 ]]; then diff --git a/dev-support/docker/Dockerfile_windows_10 b/dev-support/docker/Dockerfile_windows_10 index 20cad3f56d6e3..105529c5d65b2 100644 --- a/dev-support/docker/Dockerfile_windows_10 +++ b/dev-support/docker/Dockerfile_windows_10 @@ -38,8 +38,8 @@ RUN curl -SL --output vs_buildtools.exe https://aka.ms/vs/16/release/vs_buildtoo && del /q vs_buildtools.exe # Install Chocolatey. +ENV chocolateyVersion=1.4.0 RUN powershell -NoProfile -ExecutionPolicy Bypass -Command "iex ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))" -RUN setx PATH "%PATH%;%ALLUSERSPROFILE%\chocolatey\bin" # Install git. RUN choco install git.install -y @@ -55,24 +55,18 @@ RUN powershell .\vcpkg\vcpkg.exe install boost:x64-windows RUN powershell .\vcpkg\vcpkg.exe install protobuf:x64-windows RUN powershell .\vcpkg\vcpkg.exe install openssl:x64-windows RUN powershell .\vcpkg\vcpkg.exe install zlib:x64-windows -ENV PROTOBUF_HOME "C:\vcpkg\installed\x64-windows" # Install Azul Java 8 JDK. RUN powershell Invoke-WebRequest -URI https://cdn.azul.com/zulu/bin/zulu8.62.0.19-ca-jdk8.0.332-win_x64.zip -OutFile $Env:TEMP\zulu8.62.0.19-ca-jdk8.0.332-win_x64.zip RUN powershell Expand-Archive -Path $Env:TEMP\zulu8.62.0.19-ca-jdk8.0.332-win_x64.zip -DestinationPath "C:\Java" -ENV JAVA_HOME "C:\Java\zulu8.62.0.19-ca-jdk8.0.332-win_x64" -RUN setx PATH "%PATH%;%JAVA_HOME%\bin" # Install Apache Maven. RUN powershell Invoke-WebRequest -URI https://archive.apache.org/dist/maven/maven-3/3.8.6/binaries/apache-maven-3.8.6-bin.zip -OutFile $Env:TEMP\apache-maven-3.8.6-bin.zip RUN powershell Expand-Archive -Path $Env:TEMP\apache-maven-3.8.6-bin.zip -DestinationPath "C:\Maven" -RUN setx PATH "%PATH%;C:\Maven\apache-maven-3.8.6\bin" -ENV MAVEN_OPTS '-Xmx2048M -Xss128M' # Install CMake 3.19.0. RUN powershell Invoke-WebRequest -URI https://cmake.org/files/v3.19/cmake-3.19.0-win64-x64.zip -OutFile $Env:TEMP\cmake-3.19.0-win64-x64.zip RUN powershell Expand-Archive -Path $Env:TEMP\cmake-3.19.0-win64-x64.zip -DestinationPath "C:\CMake" -RUN setx PATH "%PATH%;C:\CMake\cmake-3.19.0-win64-x64\bin" # Install zstd 1.5.4. RUN powershell Invoke-WebRequest -Uri https://github.com/facebook/zstd/releases/download/v1.5.4/zstd-v1.5.4-win64.zip -OutFile $Env:TEMP\zstd-v1.5.4-win64.zip @@ -112,13 +106,35 @@ RUN powershell Copy-Item -Path "C:\RSync\usr\bin\*" -Destination "C:\Program` Fi RUN powershell Invoke-WebRequest -Uri https://www.python.org/ftp/python/3.10.11/python-3.10.11-embed-amd64.zip -OutFile $Env:TEMP\python-3.10.11-embed-amd64.zip RUN powershell Expand-Archive -Path $Env:TEMP\python-3.10.11-embed-amd64.zip -DestinationPath "C:\Python3" RUN powershell New-Item -ItemType HardLink -Value "C:\Python3\python.exe" -Path "C:\Python3\python3.exe" + +# Create a user HadoopBuilder with basic privileges and use it for building Hadoop on Windows. +RUN powershell New-LocalUser -Name 'HadoopBuilder' -Description 'User account for building Apache Hadoop' -Password ([securestring]::new()) -AccountNeverExpires -PasswordNeverExpires + +# Grant the privilege to create symbolic links to HadoopBuilder. +RUN powershell secedit /export /cfg "C:\secpol.cfg" +RUN powershell "(Get-Content C:\secpol.cfg).Replace('SeCreateSymbolicLinkPrivilege = ', 'SeCreateSymbolicLinkPrivilege = HadoopBuilder,') | Out-File C:\secpol.cfg" +RUN powershell secedit /configure /db "C:\windows\security\local.sdb" /cfg "C:\secpol.cfg" +RUN powershell Remove-Item -Force "C:\secpol.cfg" -Confirm:$false + +# Login as HadoopBuilder and set the necessary environment and PATH variables. +USER HadoopBuilder +ENV PROTOBUF_HOME "C:\vcpkg\installed\x64-windows" +ENV JAVA_HOME "C:\Java\zulu8.62.0.19-ca-jdk8.0.332-win_x64" +ENV MAVEN_OPTS '-Xmx2048M -Xss128M' +RUN setx PATH "%PATH%;%ALLUSERSPROFILE%\chocolatey\bin" +RUN setx PATH "%PATH%;%JAVA_HOME%\bin" +RUN setx PATH "%PATH%;C:\Maven\apache-maven-3.8.6\bin" +RUN setx PATH "%PATH%;C:\CMake\cmake-3.19.0-win64-x64\bin" +RUN setx PATH "%PATH%;C:\ZStd" RUN setx path "%PATH%;C:\Python3" +RUN setx PATH "%PATH%;C:\Program Files\Git\usr\bin" # We get strange Javadoc errors without this. RUN setx classpath "" +# Setting Git configurations. +RUN git config --global core.autocrlf true RUN git config --global core.longpaths true -RUN setx PATH "%PATH%;C:\Program Files\Git\usr\bin" # Define the entry point for the docker container. ENTRYPOINT ["C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\BuildTools\\VC\\Auxiliary\\Build\\vcvars64.bat", "&&", "cmd.exe"] diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSUtilClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSUtilClient.java index dbcee7492f06d..71cff2e3915b0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSUtilClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSUtilClient.java @@ -63,6 +63,7 @@ import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.ChunkedArrayList; import org.apache.hadoop.util.Daemon; +import org.apache.hadoop.util.Shell; import org.apache.hadoop.util.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -661,7 +662,8 @@ public static boolean isValidName(String src) { for (int i = 0; i < components.length; i++) { String element = components[i]; if (element.equals(".") || - (element.contains(":")) || + // For Windows, we must allow the : in the drive letter. + (!Shell.WINDOWS && i == 1 && element.contains(":")) || (element.contains("/"))) { return false; }