diff --git a/cpp/Brewfile b/cpp/Brewfile index 58015d2121b5b..0f5527985364c 100644 --- a/cpp/Brewfile +++ b/cpp/Brewfile @@ -19,7 +19,9 @@ brew "aws-sdk-cpp" brew "bash" brew "boost" brew "brotli" +brew "bzip2" brew "c-ares" +brew "curl" brew "ccache" brew "cmake" brew "flatbuffers" @@ -29,14 +31,18 @@ brew "googletest" brew "grpc" brew "llvm@14" brew "lz4" +brew "mimalloc" brew "ninja" brew "node" brew "openssl@3" +brew "pkg-config" brew "protobuf" brew "python" brew "rapidjson" +brew "re2" brew "snappy" brew "thrift" +brew "utf8proc" brew "wget" brew "xsimd" brew "zstd" diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index 5531415ac2277..e90c00612caa3 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -456,11 +456,18 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STRE # Don't complain about optimization passes that were not possible set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-pass-failed") - # Avoid clang / libc++ error about C++17 aligned allocation on macOS. - # See https://chromium.googlesource.com/chromium/src/+/eee44569858fc650b635779c4e34be5cb0c73186%5E%21/#F0 - # for details. if(APPLE) - set(CXX_ONLY_FLAGS "${CXX_ONLY_FLAGS} -fno-aligned-new") + # Avoid clang / libc++ error about C++17 aligned allocation on macOS. + # See https://chromium.googlesource.com/chromium/src/+/eee44569858fc650b635779c4e34be5cb0c73186%5E%21/#F0 + # for details. + string(APPEND CXX_ONLY_FLAGS " -fno-aligned-new") + + if(CMAKE_HOST_SYSTEM_VERSION VERSION_LESS 20) + # Avoid C++17 std::get 'not available' issue on macOS 10.13 + # This will be required until atleast R 4.4 is released and + # CRAN (hopefully) stops checking on 10.13 + string(APPEND CXX_ONLY_FLAGS " -D_LIBCPP_DISABLE_AVAILABILITY") + endif() endif() endif() diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 85c0337d108be..6d6a2bf7751ee 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -1308,6 +1308,26 @@ macro(build_snappy) set(SNAPPY_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} -DSNAPPY_BUILD_TESTS=OFF -DSNAPPY_BUILD_BENCHMARKS=OFF "-DCMAKE_INSTALL_PREFIX=${SNAPPY_PREFIX}") + # Snappy unconditionaly enables Werror when building with clang this can lead + # to build failues by way of new compiler warnings. This adds a flag to disable + # Werror to the very end of the invocation to override the snappy internal setting. + if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") + foreach(CONFIG DEBUG MINSIZEREL RELEASE RELWITHDEBINFO) + list(APPEND + SNAPPY_CMAKE_ARGS + "-DCMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}=${EP_CXX_FLAGS_${CONFIG}} -Wno-error" + ) + endforeach() + endif() + + if(APPLE AND CMAKE_HOST_SYSTEM_VERSION VERSION_LESS 20) + # On macOS 10.13 we need to explicitly add to avoid a missing include error + # This can be removed once CRAN no longer checks on macOS 10.13 + find_program(PATCH patch REQUIRED) + set(SNAPPY_PATCH_COMMAND ${PATCH} -p1 -i ${CMAKE_CURRENT_LIST_DIR}/snappy.diff) + else() + set(SNAPPY_PATCH_COMMAND) + endif() externalproject_add(snappy_ep ${EP_COMMON_OPTIONS} @@ -1315,6 +1335,7 @@ macro(build_snappy) INSTALL_DIR ${SNAPPY_PREFIX} URL ${SNAPPY_SOURCE_URL} URL_HASH "SHA256=${ARROW_SNAPPY_BUILD_SHA256_CHECKSUM}" + PATCH_COMMAND ${SNAPPY_PATCH_COMMAND} CMAKE_ARGS ${SNAPPY_CMAKE_ARGS} BUILD_BYPRODUCTS "${SNAPPY_STATIC_LIB}") diff --git a/cpp/cmake_modules/snappy.diff b/cpp/cmake_modules/snappy.diff new file mode 100644 index 0000000000000..f86e2bb19780c --- /dev/null +++ b/cpp/cmake_modules/snappy.diff @@ -0,0 +1,12 @@ +diff --git a/snappy.cc b/snappy.cc +index d414718..5b0d0d6 100644 +--- a/snappy.cc ++++ b/snappy.cc +@@ -83,6 +83,7 @@ + #include + #include + #include ++#include + + namespace snappy { + diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index 52d302592b55c..56163f35d2499 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -101,9 +101,8 @@ ARROW_RAPIDJSON_BUILD_VERSION=232389d4f1012dddec4ef84861face2d2ba85709 ARROW_RAPIDJSON_BUILD_SHA256_CHECKSUM=b9290a9a6d444c8e049bd589ab804e0ccf2b05dc5984a19ed5ae75d090064806 ARROW_RE2_BUILD_VERSION=2022-06-01 ARROW_RE2_BUILD_SHA256_CHECKSUM=f89c61410a072e5cbcf8c27e3a778da7d6fd2f2b5b1445cd4f4508bee946ab0f -# 1.1.9 is patched to implement https://github.com/google/snappy/pull/148 if this is bumped, remove the patch -ARROW_SNAPPY_BUILD_VERSION=1.1.9 -ARROW_SNAPPY_BUILD_SHA256_CHECKSUM=75c1fbb3d618dd3a0483bff0e26d0a92b495bbe5059c8b4f1c962b478b6e06e7 +ARROW_SNAPPY_BUILD_VERSION=1.1.10 +ARROW_SNAPPY_BUILD_SHA256_CHECKSUM=49d831bffcc5f3d01482340fe5af59852ca2fe76c3e05df0e67203ebbe0f1d90 ARROW_SUBSTRAIT_BUILD_VERSION=v0.27.0 ARROW_SUBSTRAIT_BUILD_SHA256_CHECKSUM=4ed375f69d972a57fdc5ec406c17003a111831d8640d3f1733eccd4b3ff45628 ARROW_S2N_TLS_BUILD_VERSION=v1.3.35 diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt index aebe321d613ab..af084ea215621 100644 --- a/dev/release/rat_exclude_files.txt +++ b/dev/release/rat_exclude_files.txt @@ -24,6 +24,7 @@ cpp/build-support/iwyu/* cpp/cmake_modules/FindPythonLibsNew.cmake cpp/cmake_modules/SnappyCMakeLists.txt cpp/cmake_modules/SnappyConfig.h +cpp/cmake_modules/snappy.diff cpp/examples/parquet/parquet-arrow/cmake_modules/FindArrow.cmake cpp/src/parquet/.parquetcppversion cpp/src/generated/parquet_constants.cpp diff --git a/dev/tasks/macros.jinja b/dev/tasks/macros.jinja index faf77a1168d1b..54f676fd49cd9 100644 --- a/dev/tasks/macros.jinja +++ b/dev/tasks/macros.jinja @@ -307,7 +307,8 @@ on: stopifnot(packageVersion("arrow") == {{ '"${{needs.source.outputs.pkg_version}}"' }}) {% endmacro %} -{%- macro github_setup_local_r_repo(get_nix, get_win) -%} +{%- macro github_setup_local_r_repo(get_nix, get_win, get_mac=False) -%} +# TODO: improve arg handling - name: Setup local repo shell: bash run: mkdir repo @@ -327,6 +328,17 @@ on: path: repo/libarrow/bin/linux-openssl-{{ openssl_version }} {% endfor %} {% endif %} + {% if get_mac %} + {% for openssl_version in ["1.1", "3.0"] %} + {% for arch in ["x86_64", "arm64"] %} + - name: Get macOS {{ arch }} OpenSSL {{ openssl_version }} binary + uses: actions/download-artifact@v3 + with: + name: r-lib__libarrow__bin__darwin-{{arch}}-openssl-{{ openssl_version }} + path: repo/libarrow/bin/darwin-{{ arch }}-openssl-{{ openssl_version }} + {% endfor %} + {% endfor %} + {% endif %} - name: Get src pkg uses: actions/download-artifact@v3 with: diff --git a/dev/tasks/r/github.packages.yml b/dev/tasks/r/github.packages.yml index dbe21ffb6b160..760e3b6da448d 100644 --- a/dev/tasks/r/github.packages.yml +++ b/dev/tasks/r/github.packages.yml @@ -56,6 +56,59 @@ jobs: name: r-pkg__src__contrib path: arrow/r/arrow_*.tar.gz + macos-cpp: + name: C++ Binary macOS OpenSSL {{ '${{ matrix.openssl }}' }} {{ '${{ matrix.platform.arch }}' }} + + runs-on: {{ '${{ matrix.platform.runs_on }}' }} + + needs: source + strategy: + fail-fast: false + matrix: + platform: + - { runs_on: ["self-hosted", "macos-10.13"], arch: "x86_64" } + + - { runs_on: ["self-hosted", "macOS", "arm64", "devops-managed"], arch: "arm64" } + openssl: ['3.0', '1.1'] + + steps: + {{ macros.github_checkout_arrow(action_v="3")|indent }} + {{ macros.github_change_r_pkg_version(is_fork, '${{ needs.source.outputs.pkg_version }}')|indent }} + - name: Install Deps + if: {{ "${{ !contains(matrix.platform.runs_on, 'macos-10.13') }}" }} + run: | + brew install sccache ninja + brew install openssl@{{ '${{ matrix.openssl }}' }} + - name: Build libarrow + shell: bash + env: + {{ macros.github_set_sccache_envvars()|indent(8) }} + MACOSX_DEPLOYMENT_TARGET: "10.13" + ARROW_S3: ON + ARROW_GCS: ON + ARROW_DEPENDENCY_SOURCE: BUNDLED + CMAKE_GENERATOR: Ninja + LIBARROW_MINIMAL: false + run: | + sccache --start-server + export EXTRA_CMAKE_FLAGS="-DOPENSSL_ROOT_DIR=$(brew --prefix openssl@{{ '${{ matrix.openssl }}' }})" + cd arrow + r/inst/build_arrow_static.sh + - name: Bundle libarrow + shell: bash + env: + PKG_FILE: arrow-{{ '${{ needs.source.outputs.pkg_version }}' }}.zip + VERSION: {{ '${{ needs.source.outputs.pkg_version }}' }} + run: | + cd arrow/r/libarrow/dist + zip -r $PKG_FILE lib/ include/ + + - name: Upload binary artifact + uses: actions/upload-artifact@v3 + with: + name: r-lib__libarrow__bin__darwin-{{ '${{ matrix.platform.arch }}' }}-openssl-{{ '${{ matrix.openssl }}' }} + path: arrow/r/libarrow/dist/arrow-*.zip + linux-cpp: name: C++ Binary Linux OpenSSL {{ '${{ matrix.openssl }}' }} runs-on: ubuntu-latest @@ -135,7 +188,7 @@ jobs: path: build/arrow-*.zip r-packages: - needs: [source, windows-cpp] + needs: [source, windows-cpp, macos-cpp] name: {{ '${{ matrix.platform.name }} ${{ matrix.r_version.r }}' }} runs-on: {{ '${{ matrix.platform.runs_on }}' }} strategy: @@ -167,7 +220,7 @@ jobs: rig system setup-user-lib rig system add-pak - {{ macros.github_setup_local_r_repo(false, true)|indent }} + {{ macros.github_setup_local_r_repo(false, true, true)|indent }} - name: Prepare Dependency Installation shell: bash @@ -178,18 +231,19 @@ jobs: with: working-directory: 'arrow' extra-packages: cpp11 - - name: Install sccache - if: startsWith(matrix.platform, 'macos') - run: brew install sccache + - name: Set CRAN like openssl + if: contains(matrix.platform.runs_on, 'arm64') + run: | + # The arm64 runners contain openssl 1.1.1t in this path that is always included first so we need to override the + # default setting of the brew --prefix as root dir to avoid version conflicts. + echo "OPENSSL_ROOT_DIR=/opt/R/arm64" >> $GITHUB_ENV - name: Build Binary id: build shell: Rscript {0} env: - NOT_CRAN: "true" # actions/setup-r sets this implicitly + NOT_CRAN: "false" # actions/setup-r sets this implicitly ARROW_R_DEV: "true" - FORCE_AUTOBREW: "true" # this is ignored on windows - # sccache for macos - {{ macros.github_set_sccache_envvars()|indent(8) }} + LIBARROW_BINARY: "true" # has to be set as long as allowlist not updated run: | on_windows <- tolower(Sys.info()[["sysname"]]) == "windows" @@ -213,8 +267,10 @@ jobs: INSTALL_opts = INSTALL_opts ) + # Test library(arrow) + arrow_info() read_parquet(system.file("v0.7.1.parquet", package = "arrow")) # encode contrib.url for artifact name @@ -233,7 +289,6 @@ jobs: with: name: r-pkg{{ '${{ steps.build.outputs.path }}' }} path: arrow_* - test-linux-binary: needs: [source, linux-cpp] name: Test binary {{ '${{ matrix.config.image }}' }} @@ -291,7 +346,10 @@ jobs: with: name: r-pkg_centos7 path: arrow_* + test-centos-binary: + # arrow binary package not on ppm currently see #37922 + if: false needs: test-linux-binary runs-on: ubuntu-latest container: "rstudio/r-base:4.2-centos7" @@ -317,7 +375,8 @@ jobs: read_parquet(system.file("v0.7.1.parquet", package = "arrow")) print(arrow_info()) - test-source: + #TODO test macos source build? + test-linux-source: needs: source name: Test linux source build runs-on: ubuntu-latest @@ -367,7 +426,7 @@ jobs: upload-binaries: # Only upload binaries if all tests pass. - needs: [r-packages, test-source, test-linux-binary, test-centos-binary] + needs: [r-packages, test-linux-source, test-linux-binary] name: Upload artifacts runs-on: ubuntu-latest steps: diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 859ff8ddb5b44..798932db23323 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -994,6 +994,10 @@ tasks: - r-lib__libarrow__bin__linux-openssl-1.0__arrow-{no_rc_r_version}\.zip - r-lib__libarrow__bin__linux-openssl-1.1__arrow-{no_rc_r_version}\.zip - r-lib__libarrow__bin__linux-openssl-3.0__arrow-{no_rc_r_version}\.zip + - r-lib__libarrow__bin__darwin-arm64-openssl-1.1__arrow-{no_rc_r_version}\.zip + - r-lib__libarrow__bin__darwin-arm64-openssl-3.0__arrow-{no_rc_r_version}\.zip + - r-lib__libarrow__bin__darwin-x86_64-openssl-1.1__arrow-{no_rc_r_version}\.zip + - r-lib__libarrow__bin__darwin-x86_64-openssl-3.0__arrow-{no_rc_r_version}\.zip - r-pkg__bin__windows__contrib__4.1__arrow_{no_rc_r_version}\.zip - r-pkg__bin__windows__contrib__4.2__arrow_{no_rc_r_version}\.zip - r-pkg__bin__macosx__contrib__4.1__arrow_{no_rc_r_version}\.tgz diff --git a/r/configure b/r/configure index 593f60bbddad9..d244b1a7c148f 100755 --- a/r/configure +++ b/r/configure @@ -39,8 +39,8 @@ # # * Installing a released version from source, as from CRAN, with # no other prior setup -# * On macOS, autobrew is used to retrieve libarrow and dependencies -# * On Linux, the nixlibs.R build script will download or build +# * On macOS and Linux, the nixlibs.R build script will download +# or build libarrow and dependencies # * Installing a released version but first installing libarrow. # It will use pkg-config and brew to search for libraries. # * Installing a development version from source as a user. @@ -65,8 +65,6 @@ PKG_TEST_HEADER="" # Some env vars that control the build (all logical, case insensitive) # Development mode, also increases verbosity in the bundled build ARROW_R_DEV=`echo $ARROW_R_DEV | tr '[:upper:]' '[:lower:]'` -# autobrew is how mac binaries are built on CRAN; FORCE ensures we use it here -FORCE_AUTOBREW=`echo $FORCE_AUTOBREW | tr '[:upper:]' '[:lower:]'` # The bundled build compiles arrow C++ from source; FORCE ensures we don't pick up # any other packages that may be found on the system FORCE_BUNDLED_BUILD=`echo $FORCE_BUNDLED_BUILD | tr '[:upper:]' '[:lower:]'` @@ -141,19 +139,11 @@ fi find_or_build_libarrow () { if [ "$FORCE_BUNDLED_BUILD" = "true" ]; then do_bundled_build - elif [ "$FORCE_AUTOBREW" = "true" ]; then - do_autobrew else find_arrow if [ "$_LIBARROW_FOUND" = "false" ]; then # If we haven't found a suitable version of libarrow, build it - if [ "$UNAME" = "Darwin" ] && ! echo $VERSION | grep -q "000"; then - # Only autobrew on release version (for testing, use FORCE_AUTOBREW above) - # (dev versions end in .9000, and nightly gets something like .10000xxx) - do_autobrew - else - do_bundled_build - fi + do_bundled_build fi fi } @@ -175,12 +165,6 @@ find_arrow () { # 2. Use pkg-config to find arrow on the system _LIBARROW_FOUND="`${PKG_CONFIG} --variable=prefix --silence-errors ${PKG_CONFIG_NAME}`" echo "*** Trying Arrow C++ found by pkg-config: $_LIBARROW_FOUND" - elif brew --prefix ${PKG_BREW_NAME} > /dev/null 2>&1; then - # 3. On macOS, look for Homebrew apache-arrow - # (note that if you have pkg-config, homebrew arrow may have already been found) - _LIBARROW_FOUND=`brew --prefix ${PKG_BREW_NAME}` - echo "*** Trying Arrow C++ found by Homebrew: ${_LIBARROW_FOUND}" - export PKG_CONFIG_PATH="${_LIBARROW_FOUND}/lib/pkgconfig${PKG_CONFIG_PATH:+:${PKG_CONFIG_PATH}}" else _LIBARROW_FOUND="false" fi @@ -247,34 +231,6 @@ do_bundled_build () { fi } -do_autobrew () { - echo "*** Downloading ${PKG_BREW_NAME}" - - # Setup for local autobrew testing - if [ -f "tools/apache-arrow.rb" ]; then - # If you want to use a local apache-arrow.rb formula, do - # $ cp ../dev/tasks/homebrew-formulae/autobrew/apache-arrow*.rb tools - # before R CMD build or INSTALL (assuming a local checkout of the apache/arrow repository). - # If you have this, you should use the local autobrew script so they match. - cp tools/autobrew . - fi - - if [ -f "autobrew" ]; then - echo "**** Using local manifest for ${PKG_BREW_NAME}" - else - if ! curl -sfL "https://autobrew.github.io/scripts/$PKG_BREW_NAME" > autobrew; then - echo "Failed to download manifest for ${PKG_BREW_NAME}" - # Fall back to the local copy - cp tools/autobrew . - fi - fi - if ! . autobrew; then - echo "Failed to retrieve binary for ${PKG_BREW_NAME}" - fi - # autobrew sets `PKG_LIBS`, `PKG_DIRS`, and `PKG_CFLAGS` - # TODO: move PKG_LIBS and PKG_CFLAGS out of autobrew and use set_pkg_vars -} - # Once libarrow is obtained, this function sets `PKG_LIBS`, `PKG_DIRS`, and `PKG_CFLAGS` # either from pkg-config or by inferring things about the directory in $1 set_pkg_vars () { @@ -298,6 +254,11 @@ set_pkg_vars () { if [ "$ARROW_R_CXXFLAGS" ]; then PKG_CFLAGS="$PKG_CFLAGS $ARROW_R_CXXFLAGS" fi + + if [ "$UNAME" = "Darwin" ] && expr $(sw_vers -productVersion) : '10\.13'; then + # avoid C++17 availability warnings on macOS < 11 + PKG_CFLAGS="$PKG_CFLAGS -D_LIBCPP_DISABLE_AVAILABILITY" + fi } # If we have pkg-config, it will tell us what libarrow needs diff --git a/r/tools/nixlibs-allowlist.txt b/r/tools/nixlibs-allowlist.txt index bd9f0c1b2c084..9c368e6ed15a2 100644 --- a/r/tools/nixlibs-allowlist.txt +++ b/r/tools/nixlibs-allowlist.txt @@ -2,3 +2,4 @@ ubuntu centos redhat rhel +darwin diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 3d908c05cab07..60deca05cd172 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -28,6 +28,8 @@ if (test_mode && is.na(VERSION)) { } dev_version <- package_version(VERSION)[1, 4] +on_macos <- tolower(Sys.info()[["sysname"]]) == "darwin" + # Small dev versions are added for R-only changes during CRAN submission. if (is.na(dev_version) || dev_version < "100") { @@ -94,8 +96,10 @@ download_binary <- function(lib) { } } else { if (!quietly) { - cat(sprintf("*** Downloading libarrow binary failed for version %s (%s)\n at %s\n", - VERSION, lib, binary_url)) + cat(sprintf( + "*** Downloading libarrow binary failed for version %s (%s)\n at %s\n", + VERSION, lib, binary_url + )) } libfile <- NULL } @@ -114,6 +118,10 @@ download_binary <- function(lib) { # * "linux-openssl-1.0" (OpenSSL 1.0) # * "linux-openssl-1.1" (OpenSSL 1.1) # * "linux-openssl-3.0" (OpenSSL 3.0) +# * "macos-amd64-openssl-1.1" (OpenSSL 1.1) +# * "macos-amd64-openssl-3.0" (OpenSSL 3.0) +# * "macos-arm64-openssl-1.1" (OpenSSL 1.1) +# * "macos-arm64-openssl-3.0" (OpenSSL 3.0) # These string values, along with `NULL`, are the potential return values of # this function. identify_binary <- function(lib = Sys.getenv("LIBARROW_BINARY"), info = distro()) { @@ -142,7 +150,7 @@ check_allowlist <- function(os, allowed = "https://raw.githubusercontent.com/apa # Try a remote allowlist so that we can add/remove without a release suppressWarnings(readLines(allowed)), # Fallback to default: allowed only on Ubuntu and CentOS/RHEL - error = function(e) c("ubuntu", "centos", "redhat", "rhel") + error = function(e) c("ubuntu", "centos", "redhat", "rhel", "darwin") ) # allowlist should contain valid regular expressions (plain strings ok too) any(grepl(paste(allowlist, collapse = "|"), os)) @@ -151,14 +159,16 @@ check_allowlist <- function(os, allowed = "https://raw.githubusercontent.com/apa select_binary <- function(os = tolower(Sys.info()[["sysname"]]), arch = tolower(Sys.info()[["machine"]]), test_program = test_for_curl_and_openssl) { - if (identical(os, "linux") && identical(arch, "x86_64")) { - # We only host x86 linux binaries today - tryCatch( + if (identical(os, "darwin") || (identical(os, "linux") && identical(arch, "x86_64"))) { + # We only host x86 linux binaries and x86 & arm64 macos today + binary <- tryCatch( # Somehow the test program system2 call errors on the sanitizer builds # so globally handle the possibility that this could fail { errs <- compile_test_program(test_program) - determine_binary_from_stderr(errs) + openssl_version <- determine_binary_from_stderr(errs) + arch <- ifelse(identical(os, "darwin"), paste0("-", arch, "-"), "-") + ifelse(is.null(openssl_version), NULL, paste0(os, arch, openssl_version)) }, error = function(e) { cat("*** Unable to find libcurl and openssl\n") @@ -168,17 +178,20 @@ select_binary <- function(os = tolower(Sys.info()[["sysname"]]), } else { # No binary available for arch cat(sprintf("*** Building on %s %s\n", os, arch)) - NULL + binary <- NULL } + return(binary) } # This tests that curl and OpenSSL are present (bc we can include their headers) # and it checks for other versions/features and raises errors that we grep for test_for_curl_and_openssl <- " +#ifndef __APPLE__ #include #ifdef _LIBCPP_VERSION #error Using libc++ #endif +#endif #include #include @@ -194,11 +207,14 @@ test_for_curl_and_openssl <- " " compile_test_program <- function(code) { - # Note: if we wanted to check for openssl on macOS, we'd have to set the brew - # path as a -I directory. But since we (currently) only run this code to - # determine whether we can download a Linux binary, it's not relevant. + openssl_dir <- "" + if (on_macos) { + openssl_root_dir <- get_macos_openssl_dir() + openssl_dir <- paste0("-I", openssl_root_dir, "/include") + } runner <- paste( R_CMD_config("CXX17"), + openssl_dir, R_CMD_config("CPPFLAGS"), R_CMD_config("CXX17FLAGS"), R_CMD_config("CXX17STD"), @@ -208,17 +224,34 @@ compile_test_program <- function(code) { suppressWarnings(system2("echo", sprintf('"%s" | %s -', code, runner), stdout = FALSE, stderr = TRUE)) } +get_macos_openssl_dir <- function() { + openssl_root_dir <- Sys.getenv("OPENSSL_ROOT_DIR", NA) + header <- "openssl/opensslv.h" + if (is.na(openssl_root_dir) || !file.exists(file.path(openssl_root_dir, "include", header))) { + # try to guess default openssl include dir based on CRAN's build script + # https://github.com/R-macos/recipes/blob/master/build.sh#L35 + if (identical(Sys.info()["machine"], "arm64") && file.exists(file.path("/opt/R/arm64/include", header))) { + openssl_root_dir <- "/opt/R/arm64" + } else if (identical(Sys.info()["machine"], "x86_64") && file.exists(file.path("/opt/R/x86_64/include", header))) { + openssl_root_dir <- "/opt/R/x86_64" + } else { + openssl_root_dir <- "/usr/local" + } + } + return(openssl_root_dir) +} + # (built with newer devtoolset but older glibc (2.17) for broader compatibility,# like manylinux2014) determine_binary_from_stderr <- function(errs) { if (is.null(attr(errs, "status"))) { # There was no error in compiling: so we found libcurl and OpenSSL >= 1.1, # openssl is < 3.0 cat("*** Found libcurl and OpenSSL >= 1.1\n") - return("linux-openssl-1.1") + return("openssl-1.1") # Else, check for dealbreakers: - } else if (any(grepl("Using libc++", errs, fixed = TRUE))) { - # Our binaries are all built with GNU stdlib so they fail with libc++ - cat("*** Found libc++\n") + } else if (!on_macos && any(grepl("Using libc++", errs, fixed = TRUE))) { + # Our linux binaries are all built with GNU stdlib so they fail with libc++ + cat("*** Linux binaries incompatible with libc++\n") return(NULL) } else if (header_not_found("curl/curl", errs)) { cat("*** libcurl not found\n") @@ -231,11 +264,15 @@ determine_binary_from_stderr <- function(errs) { return(NULL) # Else, determine which other binary will work } else if (any(grepl("Using OpenSSL version 1.0", errs))) { + if (on_macos) { + cat("*** OpenSSL 1.0 is not supported on macOS\n") + return(NULL) + } cat("*** Found libcurl and OpenSSL < 1.1\n") - return("linux-openssl-1.0") + return("openssl-1.0") } else if (any(grepl("Using OpenSSL version 3", errs))) { cat("*** Found libcurl and OpenSSL >= 3.0.0\n") - return("linux-openssl-3.0") + return("openssl-3.0") } NULL } @@ -248,6 +285,11 @@ header_not_found <- function(header, errs) { #### start distro #### distro <- function() { + # This is not part of distro but needed to enable prebuilt binaries on macos + if (on_macos) { + return(list(id = "darwin", arch = tolower(Sys.info()[["machine"]]))) + } + # The code in this script is a (potentially stale) copy of the distro package if (requireNamespace("distro", quietly = TRUE)) { # Use the version from the package, which may be updated from this @@ -487,8 +529,10 @@ build_libarrow <- function(src_dir, dst_dir) { # It failed :( cat("**** Error building Arrow C++.", "\n") if (quietly) { - cat("**** Printing contents of build log because the build failed", - "while ARROW_R_DEV was set to FALSE\n") + cat( + "**** Printing contents of build log because the build failed", + "while ARROW_R_DEV was set to FALSE\n" + ) cat(readLines(build_log_path), sep = "\n") cat("**** Complete build log may still be present at", build_log_path, "\n") } @@ -503,7 +547,7 @@ ensure_cmake <- function(cmake_minimum_required = "3.16") { # If not found, download it cat("**** cmake\n") CMAKE_VERSION <- Sys.getenv("CMAKE_VERSION", "3.26.4") - if (tolower(Sys.info()[["sysname"]]) %in% "darwin") { + if (on_macos) { postfix <- "-macos-universal.tar.gz" } else if (tolower(Sys.info()[["machine"]]) %in% c("arm64", "aarch64")) { postfix <- "-linux-aarch64.tar.gz" diff --git a/r/tools/test-nixlibs.R b/r/tools/test-nixlibs.R index e099dcdad4acf..c9571b58b7bbf 100644 --- a/r/tools/test-nixlibs.R +++ b/r/tools/test-nixlibs.R @@ -21,8 +21,10 @@ # Flag so that we just load the functions and don't evaluate them like we do # when called from configure.R TESTING <- TRUE - -source("nixlibs.R", local = TRUE) +# The functions use `on_macos` from the env they were sourced in, so we need tool +# explicitly set it in that environment. +nixlibs_env <- environment() +source("nixlibs.R", local = nixlibs_env) test_that("identify_binary() based on LIBARROW_BINARY", { expect_null(identify_binary("FALSE")) @@ -31,10 +33,6 @@ test_that("identify_binary() based on LIBARROW_BINARY", { }) test_that("select_binary() based on system", { - expect_output( - expect_null(select_binary("darwin", "x86_64")), # Not built today - "Building on darwin x86_64" - ) expect_output( expect_null(select_binary("linux", arch = "aarch64")), # Not built today "Building on linux aarch64" @@ -52,21 +50,30 @@ test_that("determine_binary_from_stderr", { expect_output( expect_identical( determine_binary_from_stderr(compile_test_program("int a;")), - "linux-openssl-1.1" + "openssl-1.1" ), "Found libcurl and OpenSSL >= 1.1" ) + + nixlibs_env$on_macos <- FALSE expect_output( expect_identical( determine_binary_from_stderr(compile_test_program("#error Using OpenSSL version 1.0")), - "linux-openssl-1.0" + "openssl-1.0" ), "Found libcurl and OpenSSL < 1.1" ) + nixlibs_env$on_macos <- TRUE + expect_output( + expect_null( + determine_binary_from_stderr(compile_test_program("#error Using OpenSSL version 1.0")) + ), + "OpenSSL 1.0 is not supported on macOS" + ) expect_output( expect_identical( determine_binary_from_stderr(compile_test_program("#error Using OpenSSL version 3")), - "linux-openssl-3.0" + "openssl-3.0" ), "Found libcurl and OpenSSL >= 3.0.0" ) @@ -79,6 +86,7 @@ test_that("determine_binary_from_stderr", { }) test_that("select_binary() with test program", { + nixlibs_env$on_macos <- FALSE expect_output( expect_identical( select_binary("linux", "x86_64", "int a;"), @@ -100,13 +108,49 @@ test_that("select_binary() with test program", { ), "Found libcurl and OpenSSL >= 3.0.0" ) + nixlibs_env$on_macos <- TRUE + expect_output( + expect_identical( + select_binary("darwin", "x86_64", "int a;"), + "darwin-x86_64-openssl-1.1" + ), + "Found libcurl and OpenSSL >= 1.1" + ) + expect_output( + expect_identical( + select_binary("darwin", "x86_64", "#error Using OpenSSL version 3"), + "darwin-x86_64-openssl-3.0" + ), + "Found libcurl and OpenSSL >= 3.0.0" + ) + expect_output( + expect_identical( + select_binary("darwin", "arm64", "int a;"), + "darwin-arm64-openssl-1.1" + ), + "Found libcurl and OpenSSL >= 1.1" + ) + expect_output( + expect_identical( + select_binary("darwin", "arm64", "#error Using OpenSSL version 3"), + "darwin-arm64-openssl-3.0" + ), + "Found libcurl and OpenSSL >= 3.0.0" + ) + expect_output( + expect_null( + select_binary("darwin", "x86_64", "#error Using OpenSSL version 1.0") + ), + "OpenSSL 1.0 is not supported on macOS" + ) }) test_that("check_allowlist", { tf <- tempfile() - cat("tu$\n^cent\n", file = tf) + cat("tu$\n^cent\n^dar\n", file = tf) expect_true(check_allowlist("ubuntu", tf)) expect_true(check_allowlist("centos", tf)) + expect_true(check_allowlist("darwin", tf)) expect_false(check_allowlist("redhat", tf)) # remote allowlist doesn't have this expect_true(check_allowlist("redhat", tempfile())) # remote allowlist doesn't exist, so we fall back to the default list, which contains redhat expect_false(check_allowlist("debian", tempfile()))