Skip to content

Commit

Permalink
GH-37923: [R] Move macOS build system to nixlibs.R (#37684)
Browse files Browse the repository at this point in the history
This PR modifies the build system of the R package to no longer rely on auto/homebrew. Instead this PR adds the infrastructure and code paths to use the same type of  pre-compiled libarrow binaries as we use for Linux. The main difference is the use of the binaries even on CRAN (as we previously also used binaries in form of brew bottles).

The addition of the new artifacts to tasks.yml should ensure that they get uploaded to the nightly repo as well as to the artifactory during the release (@ kou please confirm). 

A summary of the changes in this PR:
- update `r/configure` and `r/tools/nixlibs.R` to enable the source build on macOS and usage of precompiled binaries using the existing mechanism to test compile a program to detect the exisitng openssl version
- added tests for the changes in nixlibs.R
- update the binary allow-list
- Add the build jobs for libarrow binaries for arm64 and x86_64 macos with openssl 1.1 and 3.0 to the `r-binary-packages` job
- Use the binaries to build the nightly packages  
- bump snappy version to 1.1.10 (and patch it on 10.13) due to build issues with the current version. This also touches on a number of issues in regards to a sanitizer issue we have had for a long time: #32562 #31766
- Disable the centos binary test step: #37922

Follow up issues:
- #37921
- #37941 
- #37945
* Closes: #37923

Lead-authored-by: Jacob Wujciak-Jens <[email protected]>
Co-authored-by: Jonathan Keane <[email protected]>
Co-authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Dewey Dunnington <[email protected]>
  • Loading branch information
3 people authored Oct 5, 2023
1 parent 0fbfffb commit dd8734d
Show file tree
Hide file tree
Showing 13 changed files with 268 additions and 97 deletions.
6 changes: 6 additions & 0 deletions cpp/Brewfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ brew "aws-sdk-cpp"
brew "bash"
brew "boost"
brew "brotli"
brew "bzip2"
brew "c-ares"
brew "curl"
brew "ccache"
brew "cmake"
brew "flatbuffers"
Expand All @@ -29,14 +31,18 @@ brew "googletest"
brew "grpc"
brew "llvm@14"
brew "lz4"
brew "mimalloc"
brew "ninja"
brew "node"
brew "openssl@3"
brew "pkg-config"
brew "protobuf"
brew "python"
brew "rapidjson"
brew "re2"
brew "snappy"
brew "thrift"
brew "utf8proc"
brew "wget"
brew "xsimd"
brew "zstd"
15 changes: 11 additions & 4 deletions cpp/cmake_modules/SetupCxxFlags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -456,11 +456,18 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STRE
# Don't complain about optimization passes that were not possible
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-pass-failed")

# Avoid clang / libc++ error about C++17 aligned allocation on macOS.
# See https://chromium.googlesource.com/chromium/src/+/eee44569858fc650b635779c4e34be5cb0c73186%5E%21/#F0
# for details.
if(APPLE)
set(CXX_ONLY_FLAGS "${CXX_ONLY_FLAGS} -fno-aligned-new")
# Avoid clang / libc++ error about C++17 aligned allocation on macOS.
# See https://chromium.googlesource.com/chromium/src/+/eee44569858fc650b635779c4e34be5cb0c73186%5E%21/#F0
# for details.
string(APPEND CXX_ONLY_FLAGS " -fno-aligned-new")

if(CMAKE_HOST_SYSTEM_VERSION VERSION_LESS 20)
# Avoid C++17 std::get 'not available' issue on macOS 10.13
# This will be required until atleast R 4.4 is released and
# CRAN (hopefully) stops checking on 10.13
string(APPEND CXX_ONLY_FLAGS " -D_LIBCPP_DISABLE_AVAILABILITY")
endif()
endif()
endif()

Expand Down
21 changes: 21 additions & 0 deletions cpp/cmake_modules/ThirdpartyToolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -1308,13 +1308,34 @@ macro(build_snappy)
set(SNAPPY_CMAKE_ARGS
${EP_COMMON_CMAKE_ARGS} -DSNAPPY_BUILD_TESTS=OFF -DSNAPPY_BUILD_BENCHMARKS=OFF
"-DCMAKE_INSTALL_PREFIX=${SNAPPY_PREFIX}")
# Snappy unconditionaly enables Werror when building with clang this can lead
# to build failues by way of new compiler warnings. This adds a flag to disable
# Werror to the very end of the invocation to override the snappy internal setting.
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
foreach(CONFIG DEBUG MINSIZEREL RELEASE RELWITHDEBINFO)
list(APPEND
SNAPPY_CMAKE_ARGS
"-DCMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}=${EP_CXX_FLAGS_${CONFIG}} -Wno-error"
)
endforeach()
endif()

if(APPLE AND CMAKE_HOST_SYSTEM_VERSION VERSION_LESS 20)
# On macOS 10.13 we need to explicitly add <functional> to avoid a missing include error
# This can be removed once CRAN no longer checks on macOS 10.13
find_program(PATCH patch REQUIRED)
set(SNAPPY_PATCH_COMMAND ${PATCH} -p1 -i ${CMAKE_CURRENT_LIST_DIR}/snappy.diff)
else()
set(SNAPPY_PATCH_COMMAND)
endif()

externalproject_add(snappy_ep
${EP_COMMON_OPTIONS}
BUILD_IN_SOURCE 1
INSTALL_DIR ${SNAPPY_PREFIX}
URL ${SNAPPY_SOURCE_URL}
URL_HASH "SHA256=${ARROW_SNAPPY_BUILD_SHA256_CHECKSUM}"
PATCH_COMMAND ${SNAPPY_PATCH_COMMAND}
CMAKE_ARGS ${SNAPPY_CMAKE_ARGS}
BUILD_BYPRODUCTS "${SNAPPY_STATIC_LIB}")

Expand Down
12 changes: 12 additions & 0 deletions cpp/cmake_modules/snappy.diff
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
diff --git a/snappy.cc b/snappy.cc
index d414718..5b0d0d6 100644
--- a/snappy.cc
+++ b/snappy.cc
@@ -83,6 +83,7 @@
#include <string>
#include <utility>
#include <vector>
+#include <functional>

namespace snappy {

5 changes: 2 additions & 3 deletions cpp/thirdparty/versions.txt
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,8 @@ ARROW_RAPIDJSON_BUILD_VERSION=232389d4f1012dddec4ef84861face2d2ba85709
ARROW_RAPIDJSON_BUILD_SHA256_CHECKSUM=b9290a9a6d444c8e049bd589ab804e0ccf2b05dc5984a19ed5ae75d090064806
ARROW_RE2_BUILD_VERSION=2022-06-01
ARROW_RE2_BUILD_SHA256_CHECKSUM=f89c61410a072e5cbcf8c27e3a778da7d6fd2f2b5b1445cd4f4508bee946ab0f
# 1.1.9 is patched to implement https://github.com/google/snappy/pull/148 if this is bumped, remove the patch
ARROW_SNAPPY_BUILD_VERSION=1.1.9
ARROW_SNAPPY_BUILD_SHA256_CHECKSUM=75c1fbb3d618dd3a0483bff0e26d0a92b495bbe5059c8b4f1c962b478b6e06e7
ARROW_SNAPPY_BUILD_VERSION=1.1.10
ARROW_SNAPPY_BUILD_SHA256_CHECKSUM=49d831bffcc5f3d01482340fe5af59852ca2fe76c3e05df0e67203ebbe0f1d90
ARROW_SUBSTRAIT_BUILD_VERSION=v0.27.0
ARROW_SUBSTRAIT_BUILD_SHA256_CHECKSUM=4ed375f69d972a57fdc5ec406c17003a111831d8640d3f1733eccd4b3ff45628
ARROW_S2N_TLS_BUILD_VERSION=v1.3.35
Expand Down
1 change: 1 addition & 0 deletions dev/release/rat_exclude_files.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ cpp/build-support/iwyu/*
cpp/cmake_modules/FindPythonLibsNew.cmake
cpp/cmake_modules/SnappyCMakeLists.txt
cpp/cmake_modules/SnappyConfig.h
cpp/cmake_modules/snappy.diff
cpp/examples/parquet/parquet-arrow/cmake_modules/FindArrow.cmake
cpp/src/parquet/.parquetcppversion
cpp/src/generated/parquet_constants.cpp
Expand Down
14 changes: 13 additions & 1 deletion dev/tasks/macros.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,8 @@ on:
stopifnot(packageVersion("arrow") == {{ '"${{needs.source.outputs.pkg_version}}"' }})
{% endmacro %}

{%- macro github_setup_local_r_repo(get_nix, get_win) -%}
{%- macro github_setup_local_r_repo(get_nix, get_win, get_mac=False) -%}
# TODO: improve arg handling
- name: Setup local repo
shell: bash
run: mkdir repo
Expand All @@ -327,6 +328,17 @@ on:
path: repo/libarrow/bin/linux-openssl-{{ openssl_version }}
{% endfor %}
{% endif %}
{% if get_mac %}
{% for openssl_version in ["1.1", "3.0"] %}
{% for arch in ["x86_64", "arm64"] %}
- name: Get macOS {{ arch }} OpenSSL {{ openssl_version }} binary
uses: actions/download-artifact@v3
with:
name: r-lib__libarrow__bin__darwin-{{arch}}-openssl-{{ openssl_version }}
path: repo/libarrow/bin/darwin-{{ arch }}-openssl-{{ openssl_version }}
{% endfor %}
{% endfor %}
{% endif %}
- name: Get src pkg
uses: actions/download-artifact@v3
with:
Expand Down
83 changes: 71 additions & 12 deletions dev/tasks/r/github.packages.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,59 @@ jobs:
name: r-pkg__src__contrib
path: arrow/r/arrow_*.tar.gz

macos-cpp:
name: C++ Binary macOS OpenSSL {{ '${{ matrix.openssl }}' }} {{ '${{ matrix.platform.arch }}' }}

runs-on: {{ '${{ matrix.platform.runs_on }}' }}

needs: source
strategy:
fail-fast: false
matrix:
platform:
- { runs_on: ["self-hosted", "macos-10.13"], arch: "x86_64" }

- { runs_on: ["self-hosted", "macOS", "arm64", "devops-managed"], arch: "arm64" }
openssl: ['3.0', '1.1']

steps:
{{ macros.github_checkout_arrow(action_v="3")|indent }}
{{ macros.github_change_r_pkg_version(is_fork, '${{ needs.source.outputs.pkg_version }}')|indent }}
- name: Install Deps
if: {{ "${{ !contains(matrix.platform.runs_on, 'macos-10.13') }}" }}
run: |
brew install sccache ninja
brew install openssl@{{ '${{ matrix.openssl }}' }}
- name: Build libarrow
shell: bash
env:
{{ macros.github_set_sccache_envvars()|indent(8) }}
MACOSX_DEPLOYMENT_TARGET: "10.13"
ARROW_S3: ON
ARROW_GCS: ON
ARROW_DEPENDENCY_SOURCE: BUNDLED
CMAKE_GENERATOR: Ninja
LIBARROW_MINIMAL: false
run: |
sccache --start-server
export EXTRA_CMAKE_FLAGS="-DOPENSSL_ROOT_DIR=$(brew --prefix openssl@{{ '${{ matrix.openssl }}' }})"
cd arrow
r/inst/build_arrow_static.sh
- name: Bundle libarrow
shell: bash
env:
PKG_FILE: arrow-{{ '${{ needs.source.outputs.pkg_version }}' }}.zip
VERSION: {{ '${{ needs.source.outputs.pkg_version }}' }}
run: |
cd arrow/r/libarrow/dist
zip -r $PKG_FILE lib/ include/
- name: Upload binary artifact
uses: actions/upload-artifact@v3
with:
name: r-lib__libarrow__bin__darwin-{{ '${{ matrix.platform.arch }}' }}-openssl-{{ '${{ matrix.openssl }}' }}
path: arrow/r/libarrow/dist/arrow-*.zip

linux-cpp:
name: C++ Binary Linux OpenSSL {{ '${{ matrix.openssl }}' }}
runs-on: ubuntu-latest
Expand Down Expand Up @@ -135,7 +188,7 @@ jobs:
path: build/arrow-*.zip

r-packages:
needs: [source, windows-cpp]
needs: [source, windows-cpp, macos-cpp]
name: {{ '${{ matrix.platform.name }} ${{ matrix.r_version.r }}' }}
runs-on: {{ '${{ matrix.platform.runs_on }}' }}
strategy:
Expand Down Expand Up @@ -167,7 +220,7 @@ jobs:
rig system setup-user-lib
rig system add-pak
{{ macros.github_setup_local_r_repo(false, true)|indent }}
{{ macros.github_setup_local_r_repo(false, true, true)|indent }}
- name: Prepare Dependency Installation

shell: bash
Expand All @@ -178,18 +231,19 @@ jobs:
with:
working-directory: 'arrow'
extra-packages: cpp11
- name: Install sccache
if: startsWith(matrix.platform, 'macos')
run: brew install sccache
- name: Set CRAN like openssl
if: contains(matrix.platform.runs_on, 'arm64')
run: |
# The arm64 runners contain openssl 1.1.1t in this path that is always included first so we need to override the
# default setting of the brew --prefix as root dir to avoid version conflicts.
echo "OPENSSL_ROOT_DIR=/opt/R/arm64" >> $GITHUB_ENV
- name: Build Binary
id: build
shell: Rscript {0}
env:
NOT_CRAN: "true" # actions/setup-r sets this implicitly
NOT_CRAN: "false" # actions/setup-r sets this implicitly
ARROW_R_DEV: "true"
FORCE_AUTOBREW: "true" # this is ignored on windows
# sccache for macos
{{ macros.github_set_sccache_envvars()|indent(8) }}
LIBARROW_BINARY: "true" # has to be set as long as allowlist not updated
run: |
on_windows <- tolower(Sys.info()[["sysname"]]) == "windows"
Expand All @@ -213,8 +267,10 @@ jobs:
INSTALL_opts = INSTALL_opts
)
# Test
library(arrow)
arrow_info()
read_parquet(system.file("v0.7.1.parquet", package = "arrow"))
# encode contrib.url for artifact name
Expand All @@ -233,7 +289,6 @@ jobs:
with:
name: r-pkg{{ '${{ steps.build.outputs.path }}' }}
path: arrow_*

test-linux-binary:
needs: [source, linux-cpp]
name: Test binary {{ '${{ matrix.config.image }}' }}
Expand Down Expand Up @@ -291,7 +346,10 @@ jobs:
with:
name: r-pkg_centos7
path: arrow_*

test-centos-binary:
# arrow binary package not on ppm currently see #37922
if: false
needs: test-linux-binary
runs-on: ubuntu-latest
container: "rstudio/r-base:4.2-centos7"
Expand All @@ -317,7 +375,8 @@ jobs:
read_parquet(system.file("v0.7.1.parquet", package = "arrow"))
print(arrow_info())
test-source:
#TODO test macos source build?
test-linux-source:
needs: source
name: Test linux source build
runs-on: ubuntu-latest
Expand Down Expand Up @@ -367,7 +426,7 @@ jobs:
upload-binaries:
# Only upload binaries if all tests pass.
needs: [r-packages, test-source, test-linux-binary, test-centos-binary]
needs: [r-packages, test-linux-source, test-linux-binary]
name: Upload artifacts
runs-on: ubuntu-latest
steps:
Expand Down
4 changes: 4 additions & 0 deletions dev/tasks/tasks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -994,6 +994,10 @@ tasks:
- r-lib__libarrow__bin__linux-openssl-1.0__arrow-{no_rc_r_version}\.zip
- r-lib__libarrow__bin__linux-openssl-1.1__arrow-{no_rc_r_version}\.zip
- r-lib__libarrow__bin__linux-openssl-3.0__arrow-{no_rc_r_version}\.zip
- r-lib__libarrow__bin__darwin-arm64-openssl-1.1__arrow-{no_rc_r_version}\.zip
- r-lib__libarrow__bin__darwin-arm64-openssl-3.0__arrow-{no_rc_r_version}\.zip
- r-lib__libarrow__bin__darwin-x86_64-openssl-1.1__arrow-{no_rc_r_version}\.zip
- r-lib__libarrow__bin__darwin-x86_64-openssl-3.0__arrow-{no_rc_r_version}\.zip
- r-pkg__bin__windows__contrib__4.1__arrow_{no_rc_r_version}\.zip
- r-pkg__bin__windows__contrib__4.2__arrow_{no_rc_r_version}\.zip
- r-pkg__bin__macosx__contrib__4.1__arrow_{no_rc_r_version}\.tgz
Expand Down
Loading

0 comments on commit dd8734d

Please sign in to comment.