From 7125ca197e9f6d889e586f8bc420bc6a5f549b20 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Thu, 5 Oct 2023 14:48:44 +0200 Subject: [PATCH] GH-37923: [R] Move macOS build system to nixlibs.R (#37684) This PR modifies the build system of the R package to no longer rely on auto/homebrew. Instead this PR adds the infrastructure and code paths to use the same type of pre-compiled libarrow binaries as we use for Linux. The main difference is the use of the binaries even on CRAN (as we previously also used binaries in form of brew bottles). The addition of the new artifacts to tasks.yml should ensure that they get uploaded to the nightly repo as well as to the artifactory during the release (@ kou please confirm). A summary of the changes in this PR: - update `r/configure` and `r/tools/nixlibs.R` to enable the source build on macOS and usage of precompiled binaries using the existing mechanism to test compile a program to detect the exisitng openssl version - added tests for the changes in nixlibs.R - update the binary allow-list - Add the build jobs for libarrow binaries for arm64 and x86_64 macos with openssl 1.1 and 3.0 to the `r-binary-packages` job - Use the binaries to build the nightly packages - bump snappy version to 1.1.10 (and patch it on 10.13) due to build issues with the current version. This also touches on a number of issues in regards to a sanitizer issue we have had for a long time: #32562 #31766 - Disable the centos binary test step: #37922 Follow up issues: - #37921 - #37941 - #37945 * Closes: #37923 Lead-authored-by: Jacob Wujciak-Jens Co-authored-by: Jonathan Keane Co-authored-by: Sutou Kouhei Signed-off-by: Dewey Dunnington --- cpp/Brewfile | 6 ++ cpp/cmake_modules/SetupCxxFlags.cmake | 15 +++- cpp/cmake_modules/ThirdpartyToolchain.cmake | 21 ++++++ cpp/cmake_modules/snappy.diff | 12 +++ cpp/thirdparty/versions.txt | 5 +- dev/release/rat_exclude_files.txt | 1 + dev/tasks/macros.jinja | 14 +++- dev/tasks/r/github.packages.yml | 83 +++++++++++++++++--- dev/tasks/tasks.yml | 4 + r/configure | 55 ++------------ r/tools/nixlibs-allowlist.txt | 1 + r/tools/nixlibs.R | 84 ++++++++++++++++----- r/tools/test-nixlibs.R | 64 +++++++++++++--- 13 files changed, 268 insertions(+), 97 deletions(-) create mode 100644 cpp/cmake_modules/snappy.diff diff --git a/cpp/Brewfile b/cpp/Brewfile index 58015d2121b5b..0f5527985364c 100644 --- a/cpp/Brewfile +++ b/cpp/Brewfile @@ -19,7 +19,9 @@ brew "aws-sdk-cpp" brew "bash" brew "boost" brew "brotli" +brew "bzip2" brew "c-ares" +brew "curl" brew "ccache" brew "cmake" brew "flatbuffers" @@ -29,14 +31,18 @@ brew "googletest" brew "grpc" brew "llvm@14" brew "lz4" +brew "mimalloc" brew "ninja" brew "node" brew "openssl@3" +brew "pkg-config" brew "protobuf" brew "python" brew "rapidjson" +brew "re2" brew "snappy" brew "thrift" +brew "utf8proc" brew "wget" brew "xsimd" brew "zstd" diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index 5531415ac2277..e90c00612caa3 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -456,11 +456,18 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STRE # Don't complain about optimization passes that were not possible set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-pass-failed") - # Avoid clang / libc++ error about C++17 aligned allocation on macOS. - # See https://chromium.googlesource.com/chromium/src/+/eee44569858fc650b635779c4e34be5cb0c73186%5E%21/#F0 - # for details. if(APPLE) - set(CXX_ONLY_FLAGS "${CXX_ONLY_FLAGS} -fno-aligned-new") + # Avoid clang / libc++ error about C++17 aligned allocation on macOS. + # See https://chromium.googlesource.com/chromium/src/+/eee44569858fc650b635779c4e34be5cb0c73186%5E%21/#F0 + # for details. + string(APPEND CXX_ONLY_FLAGS " -fno-aligned-new") + + if(CMAKE_HOST_SYSTEM_VERSION VERSION_LESS 20) + # Avoid C++17 std::get 'not available' issue on macOS 10.13 + # This will be required until atleast R 4.4 is released and + # CRAN (hopefully) stops checking on 10.13 + string(APPEND CXX_ONLY_FLAGS " -D_LIBCPP_DISABLE_AVAILABILITY") + endif() endif() endif() diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 85c0337d108be..6d6a2bf7751ee 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -1308,6 +1308,26 @@ macro(build_snappy) set(SNAPPY_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} -DSNAPPY_BUILD_TESTS=OFF -DSNAPPY_BUILD_BENCHMARKS=OFF "-DCMAKE_INSTALL_PREFIX=${SNAPPY_PREFIX}") + # Snappy unconditionaly enables Werror when building with clang this can lead + # to build failues by way of new compiler warnings. This adds a flag to disable + # Werror to the very end of the invocation to override the snappy internal setting. + if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") + foreach(CONFIG DEBUG MINSIZEREL RELEASE RELWITHDEBINFO) + list(APPEND + SNAPPY_CMAKE_ARGS + "-DCMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}=${EP_CXX_FLAGS_${CONFIG}} -Wno-error" + ) + endforeach() + endif() + + if(APPLE AND CMAKE_HOST_SYSTEM_VERSION VERSION_LESS 20) + # On macOS 10.13 we need to explicitly add to avoid a missing include error + # This can be removed once CRAN no longer checks on macOS 10.13 + find_program(PATCH patch REQUIRED) + set(SNAPPY_PATCH_COMMAND ${PATCH} -p1 -i ${CMAKE_CURRENT_LIST_DIR}/snappy.diff) + else() + set(SNAPPY_PATCH_COMMAND) + endif() externalproject_add(snappy_ep ${EP_COMMON_OPTIONS} @@ -1315,6 +1335,7 @@ macro(build_snappy) INSTALL_DIR ${SNAPPY_PREFIX} URL ${SNAPPY_SOURCE_URL} URL_HASH "SHA256=${ARROW_SNAPPY_BUILD_SHA256_CHECKSUM}" + PATCH_COMMAND ${SNAPPY_PATCH_COMMAND} CMAKE_ARGS ${SNAPPY_CMAKE_ARGS} BUILD_BYPRODUCTS "${SNAPPY_STATIC_LIB}") diff --git a/cpp/cmake_modules/snappy.diff b/cpp/cmake_modules/snappy.diff new file mode 100644 index 0000000000000..f86e2bb19780c --- /dev/null +++ b/cpp/cmake_modules/snappy.diff @@ -0,0 +1,12 @@ +diff --git a/snappy.cc b/snappy.cc +index d414718..5b0d0d6 100644 +--- a/snappy.cc ++++ b/snappy.cc +@@ -83,6 +83,7 @@ + #include + #include + #include ++#include + + namespace snappy { + diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index 52d302592b55c..56163f35d2499 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -101,9 +101,8 @@ ARROW_RAPIDJSON_BUILD_VERSION=232389d4f1012dddec4ef84861face2d2ba85709 ARROW_RAPIDJSON_BUILD_SHA256_CHECKSUM=b9290a9a6d444c8e049bd589ab804e0ccf2b05dc5984a19ed5ae75d090064806 ARROW_RE2_BUILD_VERSION=2022-06-01 ARROW_RE2_BUILD_SHA256_CHECKSUM=f89c61410a072e5cbcf8c27e3a778da7d6fd2f2b5b1445cd4f4508bee946ab0f -# 1.1.9 is patched to implement https://github.com/google/snappy/pull/148 if this is bumped, remove the patch -ARROW_SNAPPY_BUILD_VERSION=1.1.9 -ARROW_SNAPPY_BUILD_SHA256_CHECKSUM=75c1fbb3d618dd3a0483bff0e26d0a92b495bbe5059c8b4f1c962b478b6e06e7 +ARROW_SNAPPY_BUILD_VERSION=1.1.10 +ARROW_SNAPPY_BUILD_SHA256_CHECKSUM=49d831bffcc5f3d01482340fe5af59852ca2fe76c3e05df0e67203ebbe0f1d90 ARROW_SUBSTRAIT_BUILD_VERSION=v0.27.0 ARROW_SUBSTRAIT_BUILD_SHA256_CHECKSUM=4ed375f69d972a57fdc5ec406c17003a111831d8640d3f1733eccd4b3ff45628 ARROW_S2N_TLS_BUILD_VERSION=v1.3.35 diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt index aebe321d613ab..af084ea215621 100644 --- a/dev/release/rat_exclude_files.txt +++ b/dev/release/rat_exclude_files.txt @@ -24,6 +24,7 @@ cpp/build-support/iwyu/* cpp/cmake_modules/FindPythonLibsNew.cmake cpp/cmake_modules/SnappyCMakeLists.txt cpp/cmake_modules/SnappyConfig.h +cpp/cmake_modules/snappy.diff cpp/examples/parquet/parquet-arrow/cmake_modules/FindArrow.cmake cpp/src/parquet/.parquetcppversion cpp/src/generated/parquet_constants.cpp diff --git a/dev/tasks/macros.jinja b/dev/tasks/macros.jinja index faf77a1168d1b..54f676fd49cd9 100644 --- a/dev/tasks/macros.jinja +++ b/dev/tasks/macros.jinja @@ -307,7 +307,8 @@ on: stopifnot(packageVersion("arrow") == {{ '"${{needs.source.outputs.pkg_version}}"' }}) {% endmacro %} -{%- macro github_setup_local_r_repo(get_nix, get_win) -%} +{%- macro github_setup_local_r_repo(get_nix, get_win, get_mac=False) -%} +# TODO: improve arg handling - name: Setup local repo shell: bash run: mkdir repo @@ -327,6 +328,17 @@ on: path: repo/libarrow/bin/linux-openssl-{{ openssl_version }} {% endfor %} {% endif %} + {% if get_mac %} + {% for openssl_version in ["1.1", "3.0"] %} + {% for arch in ["x86_64", "arm64"] %} + - name: Get macOS {{ arch }} OpenSSL {{ openssl_version }} binary + uses: actions/download-artifact@v3 + with: + name: r-lib__libarrow__bin__darwin-{{arch}}-openssl-{{ openssl_version }} + path: repo/libarrow/bin/darwin-{{ arch }}-openssl-{{ openssl_version }} + {% endfor %} + {% endfor %} + {% endif %} - name: Get src pkg uses: actions/download-artifact@v3 with: diff --git a/dev/tasks/r/github.packages.yml b/dev/tasks/r/github.packages.yml index dbe21ffb6b160..760e3b6da448d 100644 --- a/dev/tasks/r/github.packages.yml +++ b/dev/tasks/r/github.packages.yml @@ -56,6 +56,59 @@ jobs: name: r-pkg__src__contrib path: arrow/r/arrow_*.tar.gz + macos-cpp: + name: C++ Binary macOS OpenSSL {{ '${{ matrix.openssl }}' }} {{ '${{ matrix.platform.arch }}' }} + + runs-on: {{ '${{ matrix.platform.runs_on }}' }} + + needs: source + strategy: + fail-fast: false + matrix: + platform: + - { runs_on: ["self-hosted", "macos-10.13"], arch: "x86_64" } + + - { runs_on: ["self-hosted", "macOS", "arm64", "devops-managed"], arch: "arm64" } + openssl: ['3.0', '1.1'] + + steps: + {{ macros.github_checkout_arrow(action_v="3")|indent }} + {{ macros.github_change_r_pkg_version(is_fork, '${{ needs.source.outputs.pkg_version }}')|indent }} + - name: Install Deps + if: {{ "${{ !contains(matrix.platform.runs_on, 'macos-10.13') }}" }} + run: | + brew install sccache ninja + brew install openssl@{{ '${{ matrix.openssl }}' }} + - name: Build libarrow + shell: bash + env: + {{ macros.github_set_sccache_envvars()|indent(8) }} + MACOSX_DEPLOYMENT_TARGET: "10.13" + ARROW_S3: ON + ARROW_GCS: ON + ARROW_DEPENDENCY_SOURCE: BUNDLED + CMAKE_GENERATOR: Ninja + LIBARROW_MINIMAL: false + run: | + sccache --start-server + export EXTRA_CMAKE_FLAGS="-DOPENSSL_ROOT_DIR=$(brew --prefix openssl@{{ '${{ matrix.openssl }}' }})" + cd arrow + r/inst/build_arrow_static.sh + - name: Bundle libarrow + shell: bash + env: + PKG_FILE: arrow-{{ '${{ needs.source.outputs.pkg_version }}' }}.zip + VERSION: {{ '${{ needs.source.outputs.pkg_version }}' }} + run: | + cd arrow/r/libarrow/dist + zip -r $PKG_FILE lib/ include/ + + - name: Upload binary artifact + uses: actions/upload-artifact@v3 + with: + name: r-lib__libarrow__bin__darwin-{{ '${{ matrix.platform.arch }}' }}-openssl-{{ '${{ matrix.openssl }}' }} + path: arrow/r/libarrow/dist/arrow-*.zip + linux-cpp: name: C++ Binary Linux OpenSSL {{ '${{ matrix.openssl }}' }} runs-on: ubuntu-latest @@ -135,7 +188,7 @@ jobs: path: build/arrow-*.zip r-packages: - needs: [source, windows-cpp] + needs: [source, windows-cpp, macos-cpp] name: {{ '${{ matrix.platform.name }} ${{ matrix.r_version.r }}' }} runs-on: {{ '${{ matrix.platform.runs_on }}' }} strategy: @@ -167,7 +220,7 @@ jobs: rig system setup-user-lib rig system add-pak - {{ macros.github_setup_local_r_repo(false, true)|indent }} + {{ macros.github_setup_local_r_repo(false, true, true)|indent }} - name: Prepare Dependency Installation shell: bash @@ -178,18 +231,19 @@ jobs: with: working-directory: 'arrow' extra-packages: cpp11 - - name: Install sccache - if: startsWith(matrix.platform, 'macos') - run: brew install sccache + - name: Set CRAN like openssl + if: contains(matrix.platform.runs_on, 'arm64') + run: | + # The arm64 runners contain openssl 1.1.1t in this path that is always included first so we need to override the + # default setting of the brew --prefix as root dir to avoid version conflicts. + echo "OPENSSL_ROOT_DIR=/opt/R/arm64" >> $GITHUB_ENV - name: Build Binary id: build shell: Rscript {0} env: - NOT_CRAN: "true" # actions/setup-r sets this implicitly + NOT_CRAN: "false" # actions/setup-r sets this implicitly ARROW_R_DEV: "true" - FORCE_AUTOBREW: "true" # this is ignored on windows - # sccache for macos - {{ macros.github_set_sccache_envvars()|indent(8) }} + LIBARROW_BINARY: "true" # has to be set as long as allowlist not updated run: | on_windows <- tolower(Sys.info()[["sysname"]]) == "windows" @@ -213,8 +267,10 @@ jobs: INSTALL_opts = INSTALL_opts ) + # Test library(arrow) + arrow_info() read_parquet(system.file("v0.7.1.parquet", package = "arrow")) # encode contrib.url for artifact name @@ -233,7 +289,6 @@ jobs: with: name: r-pkg{{ '${{ steps.build.outputs.path }}' }} path: arrow_* - test-linux-binary: needs: [source, linux-cpp] name: Test binary {{ '${{ matrix.config.image }}' }} @@ -291,7 +346,10 @@ jobs: with: name: r-pkg_centos7 path: arrow_* + test-centos-binary: + # arrow binary package not on ppm currently see #37922 + if: false needs: test-linux-binary runs-on: ubuntu-latest container: "rstudio/r-base:4.2-centos7" @@ -317,7 +375,8 @@ jobs: read_parquet(system.file("v0.7.1.parquet", package = "arrow")) print(arrow_info()) - test-source: + #TODO test macos source build? + test-linux-source: needs: source name: Test linux source build runs-on: ubuntu-latest @@ -367,7 +426,7 @@ jobs: upload-binaries: # Only upload binaries if all tests pass. - needs: [r-packages, test-source, test-linux-binary, test-centos-binary] + needs: [r-packages, test-linux-source, test-linux-binary] name: Upload artifacts runs-on: ubuntu-latest steps: diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 859ff8ddb5b44..798932db23323 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -994,6 +994,10 @@ tasks: - r-lib__libarrow__bin__linux-openssl-1.0__arrow-{no_rc_r_version}\.zip - r-lib__libarrow__bin__linux-openssl-1.1__arrow-{no_rc_r_version}\.zip - r-lib__libarrow__bin__linux-openssl-3.0__arrow-{no_rc_r_version}\.zip + - r-lib__libarrow__bin__darwin-arm64-openssl-1.1__arrow-{no_rc_r_version}\.zip + - r-lib__libarrow__bin__darwin-arm64-openssl-3.0__arrow-{no_rc_r_version}\.zip + - r-lib__libarrow__bin__darwin-x86_64-openssl-1.1__arrow-{no_rc_r_version}\.zip + - r-lib__libarrow__bin__darwin-x86_64-openssl-3.0__arrow-{no_rc_r_version}\.zip - r-pkg__bin__windows__contrib__4.1__arrow_{no_rc_r_version}\.zip - r-pkg__bin__windows__contrib__4.2__arrow_{no_rc_r_version}\.zip - r-pkg__bin__macosx__contrib__4.1__arrow_{no_rc_r_version}\.tgz diff --git a/r/configure b/r/configure index 593f60bbddad9..d244b1a7c148f 100755 --- a/r/configure +++ b/r/configure @@ -39,8 +39,8 @@ # # * Installing a released version from source, as from CRAN, with # no other prior setup -# * On macOS, autobrew is used to retrieve libarrow and dependencies -# * On Linux, the nixlibs.R build script will download or build +# * On macOS and Linux, the nixlibs.R build script will download +# or build libarrow and dependencies # * Installing a released version but first installing libarrow. # It will use pkg-config and brew to search for libraries. # * Installing a development version from source as a user. @@ -65,8 +65,6 @@ PKG_TEST_HEADER="" # Some env vars that control the build (all logical, case insensitive) # Development mode, also increases verbosity in the bundled build ARROW_R_DEV=`echo $ARROW_R_DEV | tr '[:upper:]' '[:lower:]'` -# autobrew is how mac binaries are built on CRAN; FORCE ensures we use it here -FORCE_AUTOBREW=`echo $FORCE_AUTOBREW | tr '[:upper:]' '[:lower:]'` # The bundled build compiles arrow C++ from source; FORCE ensures we don't pick up # any other packages that may be found on the system FORCE_BUNDLED_BUILD=`echo $FORCE_BUNDLED_BUILD | tr '[:upper:]' '[:lower:]'` @@ -141,19 +139,11 @@ fi find_or_build_libarrow () { if [ "$FORCE_BUNDLED_BUILD" = "true" ]; then do_bundled_build - elif [ "$FORCE_AUTOBREW" = "true" ]; then - do_autobrew else find_arrow if [ "$_LIBARROW_FOUND" = "false" ]; then # If we haven't found a suitable version of libarrow, build it - if [ "$UNAME" = "Darwin" ] && ! echo $VERSION | grep -q "000"; then - # Only autobrew on release version (for testing, use FORCE_AUTOBREW above) - # (dev versions end in .9000, and nightly gets something like .10000xxx) - do_autobrew - else - do_bundled_build - fi + do_bundled_build fi fi } @@ -175,12 +165,6 @@ find_arrow () { # 2. Use pkg-config to find arrow on the system _LIBARROW_FOUND="`${PKG_CONFIG} --variable=prefix --silence-errors ${PKG_CONFIG_NAME}`" echo "*** Trying Arrow C++ found by pkg-config: $_LIBARROW_FOUND" - elif brew --prefix ${PKG_BREW_NAME} > /dev/null 2>&1; then - # 3. On macOS, look for Homebrew apache-arrow - # (note that if you have pkg-config, homebrew arrow may have already been found) - _LIBARROW_FOUND=`brew --prefix ${PKG_BREW_NAME}` - echo "*** Trying Arrow C++ found by Homebrew: ${_LIBARROW_FOUND}" - export PKG_CONFIG_PATH="${_LIBARROW_FOUND}/lib/pkgconfig${PKG_CONFIG_PATH:+:${PKG_CONFIG_PATH}}" else _LIBARROW_FOUND="false" fi @@ -247,34 +231,6 @@ do_bundled_build () { fi } -do_autobrew () { - echo "*** Downloading ${PKG_BREW_NAME}" - - # Setup for local autobrew testing - if [ -f "tools/apache-arrow.rb" ]; then - # If you want to use a local apache-arrow.rb formula, do - # $ cp ../dev/tasks/homebrew-formulae/autobrew/apache-arrow*.rb tools - # before R CMD build or INSTALL (assuming a local checkout of the apache/arrow repository). - # If you have this, you should use the local autobrew script so they match. - cp tools/autobrew . - fi - - if [ -f "autobrew" ]; then - echo "**** Using local manifest for ${PKG_BREW_NAME}" - else - if ! curl -sfL "https://autobrew.github.io/scripts/$PKG_BREW_NAME" > autobrew; then - echo "Failed to download manifest for ${PKG_BREW_NAME}" - # Fall back to the local copy - cp tools/autobrew . - fi - fi - if ! . autobrew; then - echo "Failed to retrieve binary for ${PKG_BREW_NAME}" - fi - # autobrew sets `PKG_LIBS`, `PKG_DIRS`, and `PKG_CFLAGS` - # TODO: move PKG_LIBS and PKG_CFLAGS out of autobrew and use set_pkg_vars -} - # Once libarrow is obtained, this function sets `PKG_LIBS`, `PKG_DIRS`, and `PKG_CFLAGS` # either from pkg-config or by inferring things about the directory in $1 set_pkg_vars () { @@ -298,6 +254,11 @@ set_pkg_vars () { if [ "$ARROW_R_CXXFLAGS" ]; then PKG_CFLAGS="$PKG_CFLAGS $ARROW_R_CXXFLAGS" fi + + if [ "$UNAME" = "Darwin" ] && expr $(sw_vers -productVersion) : '10\.13'; then + # avoid C++17 availability warnings on macOS < 11 + PKG_CFLAGS="$PKG_CFLAGS -D_LIBCPP_DISABLE_AVAILABILITY" + fi } # If we have pkg-config, it will tell us what libarrow needs diff --git a/r/tools/nixlibs-allowlist.txt b/r/tools/nixlibs-allowlist.txt index bd9f0c1b2c084..9c368e6ed15a2 100644 --- a/r/tools/nixlibs-allowlist.txt +++ b/r/tools/nixlibs-allowlist.txt @@ -2,3 +2,4 @@ ubuntu centos redhat rhel +darwin diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 3d908c05cab07..60deca05cd172 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -28,6 +28,8 @@ if (test_mode && is.na(VERSION)) { } dev_version <- package_version(VERSION)[1, 4] +on_macos <- tolower(Sys.info()[["sysname"]]) == "darwin" + # Small dev versions are added for R-only changes during CRAN submission. if (is.na(dev_version) || dev_version < "100") { @@ -94,8 +96,10 @@ download_binary <- function(lib) { } } else { if (!quietly) { - cat(sprintf("*** Downloading libarrow binary failed for version %s (%s)\n at %s\n", - VERSION, lib, binary_url)) + cat(sprintf( + "*** Downloading libarrow binary failed for version %s (%s)\n at %s\n", + VERSION, lib, binary_url + )) } libfile <- NULL } @@ -114,6 +118,10 @@ download_binary <- function(lib) { # * "linux-openssl-1.0" (OpenSSL 1.0) # * "linux-openssl-1.1" (OpenSSL 1.1) # * "linux-openssl-3.0" (OpenSSL 3.0) +# * "macos-amd64-openssl-1.1" (OpenSSL 1.1) +# * "macos-amd64-openssl-3.0" (OpenSSL 3.0) +# * "macos-arm64-openssl-1.1" (OpenSSL 1.1) +# * "macos-arm64-openssl-3.0" (OpenSSL 3.0) # These string values, along with `NULL`, are the potential return values of # this function. identify_binary <- function(lib = Sys.getenv("LIBARROW_BINARY"), info = distro()) { @@ -142,7 +150,7 @@ check_allowlist <- function(os, allowed = "https://raw.githubusercontent.com/apa # Try a remote allowlist so that we can add/remove without a release suppressWarnings(readLines(allowed)), # Fallback to default: allowed only on Ubuntu and CentOS/RHEL - error = function(e) c("ubuntu", "centos", "redhat", "rhel") + error = function(e) c("ubuntu", "centos", "redhat", "rhel", "darwin") ) # allowlist should contain valid regular expressions (plain strings ok too) any(grepl(paste(allowlist, collapse = "|"), os)) @@ -151,14 +159,16 @@ check_allowlist <- function(os, allowed = "https://raw.githubusercontent.com/apa select_binary <- function(os = tolower(Sys.info()[["sysname"]]), arch = tolower(Sys.info()[["machine"]]), test_program = test_for_curl_and_openssl) { - if (identical(os, "linux") && identical(arch, "x86_64")) { - # We only host x86 linux binaries today - tryCatch( + if (identical(os, "darwin") || (identical(os, "linux") && identical(arch, "x86_64"))) { + # We only host x86 linux binaries and x86 & arm64 macos today + binary <- tryCatch( # Somehow the test program system2 call errors on the sanitizer builds # so globally handle the possibility that this could fail { errs <- compile_test_program(test_program) - determine_binary_from_stderr(errs) + openssl_version <- determine_binary_from_stderr(errs) + arch <- ifelse(identical(os, "darwin"), paste0("-", arch, "-"), "-") + ifelse(is.null(openssl_version), NULL, paste0(os, arch, openssl_version)) }, error = function(e) { cat("*** Unable to find libcurl and openssl\n") @@ -168,17 +178,20 @@ select_binary <- function(os = tolower(Sys.info()[["sysname"]]), } else { # No binary available for arch cat(sprintf("*** Building on %s %s\n", os, arch)) - NULL + binary <- NULL } + return(binary) } # This tests that curl and OpenSSL are present (bc we can include their headers) # and it checks for other versions/features and raises errors that we grep for test_for_curl_and_openssl <- " +#ifndef __APPLE__ #include #ifdef _LIBCPP_VERSION #error Using libc++ #endif +#endif #include #include @@ -194,11 +207,14 @@ test_for_curl_and_openssl <- " " compile_test_program <- function(code) { - # Note: if we wanted to check for openssl on macOS, we'd have to set the brew - # path as a -I directory. But since we (currently) only run this code to - # determine whether we can download a Linux binary, it's not relevant. + openssl_dir <- "" + if (on_macos) { + openssl_root_dir <- get_macos_openssl_dir() + openssl_dir <- paste0("-I", openssl_root_dir, "/include") + } runner <- paste( R_CMD_config("CXX17"), + openssl_dir, R_CMD_config("CPPFLAGS"), R_CMD_config("CXX17FLAGS"), R_CMD_config("CXX17STD"), @@ -208,17 +224,34 @@ compile_test_program <- function(code) { suppressWarnings(system2("echo", sprintf('"%s" | %s -', code, runner), stdout = FALSE, stderr = TRUE)) } +get_macos_openssl_dir <- function() { + openssl_root_dir <- Sys.getenv("OPENSSL_ROOT_DIR", NA) + header <- "openssl/opensslv.h" + if (is.na(openssl_root_dir) || !file.exists(file.path(openssl_root_dir, "include", header))) { + # try to guess default openssl include dir based on CRAN's build script + # https://github.com/R-macos/recipes/blob/master/build.sh#L35 + if (identical(Sys.info()["machine"], "arm64") && file.exists(file.path("/opt/R/arm64/include", header))) { + openssl_root_dir <- "/opt/R/arm64" + } else if (identical(Sys.info()["machine"], "x86_64") && file.exists(file.path("/opt/R/x86_64/include", header))) { + openssl_root_dir <- "/opt/R/x86_64" + } else { + openssl_root_dir <- "/usr/local" + } + } + return(openssl_root_dir) +} + # (built with newer devtoolset but older glibc (2.17) for broader compatibility,# like manylinux2014) determine_binary_from_stderr <- function(errs) { if (is.null(attr(errs, "status"))) { # There was no error in compiling: so we found libcurl and OpenSSL >= 1.1, # openssl is < 3.0 cat("*** Found libcurl and OpenSSL >= 1.1\n") - return("linux-openssl-1.1") + return("openssl-1.1") # Else, check for dealbreakers: - } else if (any(grepl("Using libc++", errs, fixed = TRUE))) { - # Our binaries are all built with GNU stdlib so they fail with libc++ - cat("*** Found libc++\n") + } else if (!on_macos && any(grepl("Using libc++", errs, fixed = TRUE))) { + # Our linux binaries are all built with GNU stdlib so they fail with libc++ + cat("*** Linux binaries incompatible with libc++\n") return(NULL) } else if (header_not_found("curl/curl", errs)) { cat("*** libcurl not found\n") @@ -231,11 +264,15 @@ determine_binary_from_stderr <- function(errs) { return(NULL) # Else, determine which other binary will work } else if (any(grepl("Using OpenSSL version 1.0", errs))) { + if (on_macos) { + cat("*** OpenSSL 1.0 is not supported on macOS\n") + return(NULL) + } cat("*** Found libcurl and OpenSSL < 1.1\n") - return("linux-openssl-1.0") + return("openssl-1.0") } else if (any(grepl("Using OpenSSL version 3", errs))) { cat("*** Found libcurl and OpenSSL >= 3.0.0\n") - return("linux-openssl-3.0") + return("openssl-3.0") } NULL } @@ -248,6 +285,11 @@ header_not_found <- function(header, errs) { #### start distro #### distro <- function() { + # This is not part of distro but needed to enable prebuilt binaries on macos + if (on_macos) { + return(list(id = "darwin", arch = tolower(Sys.info()[["machine"]]))) + } + # The code in this script is a (potentially stale) copy of the distro package if (requireNamespace("distro", quietly = TRUE)) { # Use the version from the package, which may be updated from this @@ -487,8 +529,10 @@ build_libarrow <- function(src_dir, dst_dir) { # It failed :( cat("**** Error building Arrow C++.", "\n") if (quietly) { - cat("**** Printing contents of build log because the build failed", - "while ARROW_R_DEV was set to FALSE\n") + cat( + "**** Printing contents of build log because the build failed", + "while ARROW_R_DEV was set to FALSE\n" + ) cat(readLines(build_log_path), sep = "\n") cat("**** Complete build log may still be present at", build_log_path, "\n") } @@ -503,7 +547,7 @@ ensure_cmake <- function(cmake_minimum_required = "3.16") { # If not found, download it cat("**** cmake\n") CMAKE_VERSION <- Sys.getenv("CMAKE_VERSION", "3.26.4") - if (tolower(Sys.info()[["sysname"]]) %in% "darwin") { + if (on_macos) { postfix <- "-macos-universal.tar.gz" } else if (tolower(Sys.info()[["machine"]]) %in% c("arm64", "aarch64")) { postfix <- "-linux-aarch64.tar.gz" diff --git a/r/tools/test-nixlibs.R b/r/tools/test-nixlibs.R index e099dcdad4acf..c9571b58b7bbf 100644 --- a/r/tools/test-nixlibs.R +++ b/r/tools/test-nixlibs.R @@ -21,8 +21,10 @@ # Flag so that we just load the functions and don't evaluate them like we do # when called from configure.R TESTING <- TRUE - -source("nixlibs.R", local = TRUE) +# The functions use `on_macos` from the env they were sourced in, so we need tool +# explicitly set it in that environment. +nixlibs_env <- environment() +source("nixlibs.R", local = nixlibs_env) test_that("identify_binary() based on LIBARROW_BINARY", { expect_null(identify_binary("FALSE")) @@ -31,10 +33,6 @@ test_that("identify_binary() based on LIBARROW_BINARY", { }) test_that("select_binary() based on system", { - expect_output( - expect_null(select_binary("darwin", "x86_64")), # Not built today - "Building on darwin x86_64" - ) expect_output( expect_null(select_binary("linux", arch = "aarch64")), # Not built today "Building on linux aarch64" @@ -52,21 +50,30 @@ test_that("determine_binary_from_stderr", { expect_output( expect_identical( determine_binary_from_stderr(compile_test_program("int a;")), - "linux-openssl-1.1" + "openssl-1.1" ), "Found libcurl and OpenSSL >= 1.1" ) + + nixlibs_env$on_macos <- FALSE expect_output( expect_identical( determine_binary_from_stderr(compile_test_program("#error Using OpenSSL version 1.0")), - "linux-openssl-1.0" + "openssl-1.0" ), "Found libcurl and OpenSSL < 1.1" ) + nixlibs_env$on_macos <- TRUE + expect_output( + expect_null( + determine_binary_from_stderr(compile_test_program("#error Using OpenSSL version 1.0")) + ), + "OpenSSL 1.0 is not supported on macOS" + ) expect_output( expect_identical( determine_binary_from_stderr(compile_test_program("#error Using OpenSSL version 3")), - "linux-openssl-3.0" + "openssl-3.0" ), "Found libcurl and OpenSSL >= 3.0.0" ) @@ -79,6 +86,7 @@ test_that("determine_binary_from_stderr", { }) test_that("select_binary() with test program", { + nixlibs_env$on_macos <- FALSE expect_output( expect_identical( select_binary("linux", "x86_64", "int a;"), @@ -100,13 +108,49 @@ test_that("select_binary() with test program", { ), "Found libcurl and OpenSSL >= 3.0.0" ) + nixlibs_env$on_macos <- TRUE + expect_output( + expect_identical( + select_binary("darwin", "x86_64", "int a;"), + "darwin-x86_64-openssl-1.1" + ), + "Found libcurl and OpenSSL >= 1.1" + ) + expect_output( + expect_identical( + select_binary("darwin", "x86_64", "#error Using OpenSSL version 3"), + "darwin-x86_64-openssl-3.0" + ), + "Found libcurl and OpenSSL >= 3.0.0" + ) + expect_output( + expect_identical( + select_binary("darwin", "arm64", "int a;"), + "darwin-arm64-openssl-1.1" + ), + "Found libcurl and OpenSSL >= 1.1" + ) + expect_output( + expect_identical( + select_binary("darwin", "arm64", "#error Using OpenSSL version 3"), + "darwin-arm64-openssl-3.0" + ), + "Found libcurl and OpenSSL >= 3.0.0" + ) + expect_output( + expect_null( + select_binary("darwin", "x86_64", "#error Using OpenSSL version 1.0") + ), + "OpenSSL 1.0 is not supported on macOS" + ) }) test_that("check_allowlist", { tf <- tempfile() - cat("tu$\n^cent\n", file = tf) + cat("tu$\n^cent\n^dar\n", file = tf) expect_true(check_allowlist("ubuntu", tf)) expect_true(check_allowlist("centos", tf)) + expect_true(check_allowlist("darwin", tf)) expect_false(check_allowlist("redhat", tf)) # remote allowlist doesn't have this expect_true(check_allowlist("redhat", tempfile())) # remote allowlist doesn't exist, so we fall back to the default list, which contains redhat expect_false(check_allowlist("debian", tempfile()))