Skip to content

Commit

Permalink
HDFS-17246. Fix shaded client for building Hadoop on Windows (#5943)
Browse files Browse the repository at this point in the history
  • Loading branch information
GauthamBanasandra authored Nov 1, 2023
1 parent cf3a4b3 commit cbb153b
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 36 deletions.
67 changes: 40 additions & 27 deletions dev-support/bin/hadoop.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,20 @@
personality_plugins "all,-ant,-gradle,-scalac,-scaladoc"

# These flags are needed to run Yetus against Hadoop on Windows.
WINDOWS_FLAGS="-Pnative-win
-Dhttps.protocols=TLSv1.2
-Drequire.openssl
-Drequire.test.libhadoop
-Dshell-executable=${BASH_EXECUTABLE}
-Dopenssl.prefix=${VCPKG_INSTALLED_PACKAGES}
-Dcmake.prefix.path=${VCPKG_INSTALLED_PACKAGES}
-Dwindows.cmake.toolchain.file=${CMAKE_TOOLCHAIN_FILE}
-Dwindows.cmake.build.type=RelWithDebInfo
-Dwindows.build.hdfspp.dll=off
-Dwindows.no.sasl=on
-Duse.platformToolsetVersion=v142"
WINDOWS_FLAGS=(
"-Pnative-win"
"-Dhttps.protocols=TLSv1.2"
"-Drequire.openssl"
"-Drequire.test.libhadoop"
"-Dshell-executable=${BASH_EXECUTABLE}"
"-Dopenssl.prefix=${VCPKG_INSTALLED_PACKAGES}"
"-Dcmake.prefix.path=${VCPKG_INSTALLED_PACKAGES}"
"-Dwindows.cmake.toolchain.file=${CMAKE_TOOLCHAIN_FILE}"
"-Dwindows.cmake.build.type=RelWithDebInfo"
"-Dwindows.build.hdfspp.dll=off"
"-Dwindows.no.sasl=on"
"-Duse.platformToolsetVersion=v142"
)

## @description Globals specific to this personality
## @audience private
Expand Down Expand Up @@ -292,7 +294,7 @@ function hadoop_native_flags
-Drequire.snappy \
-Pdist \
-Dtar \
"${WINDOWS_FLAGS}"
"${WINDOWS_FLAGS[@]}"
;;
*)
echo \
Expand Down Expand Up @@ -436,7 +438,7 @@ function personality_modules
fi

if [[ "$IS_WINDOWS" && "$IS_WINDOWS" == 1 ]]; then
extra="-Ptest-patch -Pdist -Dtar ${WINDOWS_FLAGS} ${extra}"
extra="-Ptest-patch -Pdist -Dtar ${WINDOWS_FLAGS[*]} ${extra}"
fi

for module in $(hadoop_order ${ordering}); do
Expand Down Expand Up @@ -557,14 +559,6 @@ function shadedclient_rebuild
declare module
declare -a modules=()

if [[ ${OSTYPE} = Windows_NT ||
${OSTYPE} =~ ^CYGWIN.* ||
${OSTYPE} =~ ^MINGW32.* ||
${OSTYPE} =~ ^MSYS.* ]]; then
echo "hadoop personality: building on windows, skipping check of client artifacts."
return 0
fi

yetus_debug "hadoop personality: seeing if we need the test of client artifacts."
for module in hadoop-client-modules/hadoop-client-check-invariants \
hadoop-client-modules/hadoop-client-check-test-invariants \
Expand All @@ -581,28 +575,47 @@ function shadedclient_rebuild

big_console_header "Checking client artifacts on ${repostatus} with shaded clients"

extra="-Dtest=NoUnitTests -Dmaven.javadoc.skip=true -Dcheckstyle.skip=true -Dspotbugs.skip=true"
extra=(
"-Dtest=NoUnitTests"
"-Dmaven.javadoc.skip=true"
"-Dcheckstyle.skip=true"
"-Dspotbugs.skip=true"
)

if [[ "$IS_WINDOWS" && "$IS_WINDOWS" == 1 ]]; then
# shellcheck disable=SC2206
extra+=(${WINDOWS_FLAGS[*]})

# The shaded client integration tests require the Hadoop jars that were just built to be
# installed in the local maven repository.
# shellcheck disable=SC2086
echo_and_redirect "${logfile}" \
"${MAVEN}" "${MAVEN_ARGS[@]}" install -fae --batch-mode \
-DskipTests -DskipDocs -Pdist -Dtar ${extra[*]}

# The shaded client integration tests spawn a MiniDFS and MiniYARN cluster for testing. Both of
# them require winutils.exe to be found in the PATH and HADOOP_HOME to be set.
if load_hadoop_version; then
export HADOOP_HOME="${SOURCEDIR}/hadoop-dist/target/hadoop-${HADOOP_VERSION}-SNAPSHOT"
WIN_HADOOP_HOME=$(cygpath -w -a "${HADOOP_HOME}")
export PATH="${PATH};${WIN_HADOOP_HOME}\bin"
else
yetus_error "[WARNING] Unable to extract the Hadoop version and thus HADOOP_HOME is not set. Some tests may fail."
fi

extra="${WINDOWS_FLAGS} ${extra}"
fi

# shellcheck disable=SC2086
echo_and_redirect "${logfile}" \
"${MAVEN}" "${MAVEN_ARGS[@]}" verify -fae --batch-mode -am "${modules[@]}" "${extra}"
"${MAVEN}" "${MAVEN_ARGS[@]}" verify -fae --batch-mode -am "${modules[@]}" ${extra[*]}

big_console_header "Checking client artifacts on ${repostatus} with non-shaded clients"

# shellcheck disable=SC2086
echo_and_redirect "${logfile}" \
"${MAVEN}" "${MAVEN_ARGS[@]}" verify -fae --batch-mode -am \
"${modules[@]}" \
-DskipShade -Dtest=NoUnitTests -Dmaven.javadoc.skip=true -Dcheckstyle.skip=true \
-Dspotbugs.skip=true "${extra}"
-Dspotbugs.skip=true ${extra[*]}

count=$("${GREP}" -c '\[ERROR\]' "${logfile}")
if [[ ${count} -gt 0 ]]; then
Expand Down
32 changes: 24 additions & 8 deletions dev-support/docker/Dockerfile_windows_10
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ RUN curl -SL --output vs_buildtools.exe https://aka.ms/vs/16/release/vs_buildtoo
&& del /q vs_buildtools.exe

# Install Chocolatey.
ENV chocolateyVersion=1.4.0
RUN powershell -NoProfile -ExecutionPolicy Bypass -Command "iex ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))"
RUN setx PATH "%PATH%;%ALLUSERSPROFILE%\chocolatey\bin"

# Install git.
RUN choco install git.install -y
Expand All @@ -55,24 +55,18 @@ RUN powershell .\vcpkg\vcpkg.exe install boost:x64-windows
RUN powershell .\vcpkg\vcpkg.exe install protobuf:x64-windows
RUN powershell .\vcpkg\vcpkg.exe install openssl:x64-windows
RUN powershell .\vcpkg\vcpkg.exe install zlib:x64-windows
ENV PROTOBUF_HOME "C:\vcpkg\installed\x64-windows"

# Install Azul Java 8 JDK.
RUN powershell Invoke-WebRequest -URI https://cdn.azul.com/zulu/bin/zulu8.62.0.19-ca-jdk8.0.332-win_x64.zip -OutFile $Env:TEMP\zulu8.62.0.19-ca-jdk8.0.332-win_x64.zip
RUN powershell Expand-Archive -Path $Env:TEMP\zulu8.62.0.19-ca-jdk8.0.332-win_x64.zip -DestinationPath "C:\Java"
ENV JAVA_HOME "C:\Java\zulu8.62.0.19-ca-jdk8.0.332-win_x64"
RUN setx PATH "%PATH%;%JAVA_HOME%\bin"

# Install Apache Maven.
RUN powershell Invoke-WebRequest -URI https://archive.apache.org/dist/maven/maven-3/3.8.6/binaries/apache-maven-3.8.6-bin.zip -OutFile $Env:TEMP\apache-maven-3.8.6-bin.zip
RUN powershell Expand-Archive -Path $Env:TEMP\apache-maven-3.8.6-bin.zip -DestinationPath "C:\Maven"
RUN setx PATH "%PATH%;C:\Maven\apache-maven-3.8.6\bin"
ENV MAVEN_OPTS '-Xmx2048M -Xss128M'

# Install CMake 3.19.0.
RUN powershell Invoke-WebRequest -URI https://cmake.org/files/v3.19/cmake-3.19.0-win64-x64.zip -OutFile $Env:TEMP\cmake-3.19.0-win64-x64.zip
RUN powershell Expand-Archive -Path $Env:TEMP\cmake-3.19.0-win64-x64.zip -DestinationPath "C:\CMake"
RUN setx PATH "%PATH%;C:\CMake\cmake-3.19.0-win64-x64\bin"

# Install zstd 1.5.4.
RUN powershell Invoke-WebRequest -Uri https://github.com/facebook/zstd/releases/download/v1.5.4/zstd-v1.5.4-win64.zip -OutFile $Env:TEMP\zstd-v1.5.4-win64.zip
Expand Down Expand Up @@ -112,13 +106,35 @@ RUN powershell Copy-Item -Path "C:\RSync\usr\bin\*" -Destination "C:\Program` Fi
RUN powershell Invoke-WebRequest -Uri https://www.python.org/ftp/python/3.10.11/python-3.10.11-embed-amd64.zip -OutFile $Env:TEMP\python-3.10.11-embed-amd64.zip
RUN powershell Expand-Archive -Path $Env:TEMP\python-3.10.11-embed-amd64.zip -DestinationPath "C:\Python3"
RUN powershell New-Item -ItemType HardLink -Value "C:\Python3\python.exe" -Path "C:\Python3\python3.exe"

# Create a user HadoopBuilder with basic privileges and use it for building Hadoop on Windows.
RUN powershell New-LocalUser -Name 'HadoopBuilder' -Description 'User account for building Apache Hadoop' -Password ([securestring]::new()) -AccountNeverExpires -PasswordNeverExpires

# Grant the privilege to create symbolic links to HadoopBuilder.
RUN powershell secedit /export /cfg "C:\secpol.cfg"
RUN powershell "(Get-Content C:\secpol.cfg).Replace('SeCreateSymbolicLinkPrivilege = ', 'SeCreateSymbolicLinkPrivilege = HadoopBuilder,') | Out-File C:\secpol.cfg"
RUN powershell secedit /configure /db "C:\windows\security\local.sdb" /cfg "C:\secpol.cfg"
RUN powershell Remove-Item -Force "C:\secpol.cfg" -Confirm:$false

# Login as HadoopBuilder and set the necessary environment and PATH variables.
USER HadoopBuilder
ENV PROTOBUF_HOME "C:\vcpkg\installed\x64-windows"
ENV JAVA_HOME "C:\Java\zulu8.62.0.19-ca-jdk8.0.332-win_x64"
ENV MAVEN_OPTS '-Xmx2048M -Xss128M'
RUN setx PATH "%PATH%;%ALLUSERSPROFILE%\chocolatey\bin"
RUN setx PATH "%PATH%;%JAVA_HOME%\bin"
RUN setx PATH "%PATH%;C:\Maven\apache-maven-3.8.6\bin"
RUN setx PATH "%PATH%;C:\CMake\cmake-3.19.0-win64-x64\bin"
RUN setx PATH "%PATH%;C:\ZStd"
RUN setx path "%PATH%;C:\Python3"
RUN setx PATH "%PATH%;C:\Program Files\Git\usr\bin"

# We get strange Javadoc errors without this.
RUN setx classpath ""

# Setting Git configurations.
RUN git config --global core.autocrlf true
RUN git config --global core.longpaths true
RUN setx PATH "%PATH%;C:\Program Files\Git\usr\bin"

# Define the entry point for the docker container.
ENTRYPOINT ["C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\BuildTools\\VC\\Auxiliary\\Build\\vcvars64.bat", "&&", "cmd.exe"]
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.util.ChunkedArrayList;
import org.apache.hadoop.util.Daemon;
import org.apache.hadoop.util.Shell;
import org.apache.hadoop.util.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -661,7 +662,8 @@ public static boolean isValidName(String src) {
for (int i = 0; i < components.length; i++) {
String element = components[i];
if (element.equals(".") ||
(element.contains(":")) ||
// For Windows, we must allow the : in the drive letter.
(!Shell.WINDOWS && i == 1 && element.contains(":")) ||
(element.contains("/"))) {
return false;
}
Expand Down

0 comments on commit cbb153b

Please sign in to comment.