Skip to content

Commit

Permalink
Split pyarrow into pyarrow-core, pyarrow and pyarrow-all
Browse files Browse the repository at this point in the history
  • Loading branch information
raulcd committed Apr 30, 2024
1 parent b80a399 commit a632f02
Show file tree
Hide file tree
Showing 3 changed files with 146 additions and 35 deletions.
2 changes: 1 addition & 1 deletion recipe/build-pyarrow.bat
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ if "%cuda_compiler_version%"=="None" (
if %ERRORLEVEL% neq 0 exit 1
popd

if [%PKG_NAME%] == [pyarrow] (
if [%PKG_NAME%] NEQ [pyarrow-tests] (
rd /s /q %SP_DIR%\pyarrow\tests
)

Expand Down
2 changes: 1 addition & 1 deletion recipe/build-pyarrow.sh
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ $PYTHON setup.py \
install --single-version-externally-managed \
--record=record.txt

if [[ "$PKG_NAME" == "pyarrow" ]]; then
if [[ "$PKG_NAME" != "pyarrow-tests" ]]; then
rm -r ${SP_DIR}/pyarrow/tests
fi

Expand Down
177 changes: 144 additions & 33 deletions recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -637,7 +637,7 @@ outputs:
- LICENSE.txt
summary: C++ libraries for Apache Parquet

- name: pyarrow
- name: pyarrow-core
script: build-pyarrow.sh # [unix]
script: build-pyarrow.bat # [win]
version: {{ version }}
Expand All @@ -653,6 +653,20 @@ outputs:
# not actually missing, but installed into SP_DIR, see tests
- '*/arrow_python.dll' # [win]
- '*/arrow_python_flight.dll' # [win]
# pyarrow-core builds with the capabilities but we do not ship them
# to provide the smaller core functionality.
- 'lib/libarrow_acero.*' # [unix]
- 'lib/libarrow_dataset.*' # [unix]
- 'lib/libarrow_substrait.*' # [unix]
- 'lib/libarrow_flight.*' # [unix]
- 'lib/libparquet.*' # [unix]
- 'lib/libgandiva.*' # [unix]
- 'Library/lib/arrow_acero.dll' # [win]
- 'Library/lib/arrow_dataset.dll' # [win]
- 'Library/lib/arrow_substrait.dll' # [win]
- 'Library/lib/arrow_flight.dll' # [win]
- 'Library/lib/parquet.dll' # [win]
- 'Library/lib/gandiva.dll' # [win]
requirements:
build:
- {{ compiler("c") }}
Expand All @@ -667,17 +681,12 @@ outputs:
- cmake
- ninja
host:
- {{ pin_subpackage("libarrow-all", exact=True) }}
- clangdev {{ llvm_version }}
- llvmdev {{ llvm_version }}
- zlib
- cython
- numpy
- python
- setuptools
- setuptools-scm
run:
# full set of libs because run-exports from libarrow-all aren't picked up
# We add all libarrow package dependencies on host in order to build pyarrow
# once with all capabilities. We require the exact hash versions but we don't
# want those to be installed when running for pyarrow-core as we want a
# low memory footprint for the pyarrow-core package.
# We cannot use them on build as we want the exact hash version built on the
# recipe to be linked, otherwise we get mismatching package errors.
- {{ pin_subpackage("libarrow", exact=True) }}
- {{ pin_subpackage("libarrow-acero", exact=True) }}
- {{ pin_subpackage("libarrow-dataset", exact=True) }}
Expand All @@ -686,22 +695,27 @@ outputs:
- {{ pin_subpackage("libarrow-gandiva", exact=True) }}
- {{ pin_subpackage("libarrow-substrait", exact=True) }}
- {{ pin_subpackage("libparquet", exact=True) }}
- clangdev {{ llvm_version }}
- llvmdev {{ llvm_version }}
- cython
- numpy
- python
- setuptools
# see https://github.com/apache/arrow/issues/37931
- setuptools_scm <8
run:
- {{ pin_subpackage("libarrow", exact=True) }}
- {{ pin_compatible('numpy') }}
- python
run_constrained:
- apache-arrow-proc =*={{ build_ext }}

test:
files:
- test_read_parquet.py
imports:
- pyarrow
- pyarrow.dataset
# Compute can be imported but the underlying libarrow_acero is not present.
- pyarrow.compute
- pyarrow.flight
- pyarrow.gandiva
- pyarrow.orc # [unix]
- pyarrow.parquet
- pyarrow.fs
- pyarrow._s3fs
- pyarrow._hdfs
Expand All @@ -713,28 +727,125 @@ outputs:
- pyarrow.cuda # [cuda_compiler_version != "None" and not win]
commands:
# libraries that depend on python (and hence aren't in libarrow itself)
- test -f ${SP_DIR}/pyarrow/libarrow_python.so # [linux]
- test -f ${SP_DIR}/pyarrow/libarrow_python_flight.so # [linux]
- test -f ${SP_DIR}/pyarrow/libarrow_python.dylib # [osx]
- test -f ${SP_DIR}/pyarrow/libarrow_python_flight.dylib # [osx]
- if not exist %SP_DIR%\pyarrow\arrow_python.dll exit 1 # [win]
- if not exist %SP_DIR%\pyarrow\arrow_python_flight.dll exit 1 # [win]

- test -f ${SP_DIR}/pyarrow/include/arrow/python/pyarrow.h # [unix]
- if not exist %SP_DIR%\pyarrow\include\arrow\python\pyarrow.h exit 1 # [win]

- test ! -f ${SP_DIR}/pyarrow/tests/test_array.py # [unix]
- if exist %SP_DIR%/pyarrow/tests/test_array.py exit 1 # [win]
- test -f ${SP_DIR}/pyarrow/libarrow_python.so # [linux]
- test -f ${SP_DIR}/pyarrow/libarrow_python_flight.so # [linux]
- test -f ${SP_DIR}/pyarrow/libarrow_python_parquet_encryption.so # [linux]
- test -f ${SP_DIR}/pyarrow/libarrow_python.dylib # [osx]
- test -f ${SP_DIR}/pyarrow/libarrow_python_flight.dylib # [osx]
- test -f ${SP_DIR}/pyarrow/libarrow_python_parquet_encryption.dylib # [osx]
- if not exist %SP_DIR%\pyarrow\arrow_python.dll exit 1 # [win]
- if not exist %SP_DIR%\pyarrow\arrow_python_flight.dll exit 1 # [win]
- if not exist %SP_DIR%\pyarrow\arrow_python_parquet_encryption.dll exit 1 # [win]

- test -f ${SP_DIR}/pyarrow/include/arrow/python/pyarrow.h # [unix]
- if not exist %SP_DIR%\pyarrow\include\arrow\python\pyarrow.h exit 1 # [win]

- test ! -f ${SP_DIR}/pyarrow/tests/test_array.py # [unix]
- if exist %SP_DIR%/pyarrow/tests/test_array.py exit 1 # [win]
# Need to remove dot from PY_VER; %MYVAR:x=y% replaces "x" in %MYVAR% with "y"
- if not exist %SP_DIR%/pyarrow/_cuda.cp%PY_VER:.=%-win_amd64.pyd exit 1 # [win and cuda_compiler_version != "None"]

# Expected not included libraries
- test ! -f $PREFIX/lib/libarrow_acero${SHLIB_EXT} # [unix]
- test ! -f $PREFIX/lib/libarrow_dataset${SHLIB_EXT} # [unix]
- test ! -f $PREFIX/lib/libarrow_flight${SHLIB_EXT} # [unix]
- test ! -f $PREFIX/lib/libgandiva${SHLIB_EXT} # [unix]
- test ! -f $PREFIX/lib/libparquet${SHLIB_EXT} # [unix]

about:
home: http://github.com/apache/arrow
license: Apache-2.0
license_file:
- LICENSE.txt
summary: Python libraries for Apache Arrow Core

- name: pyarrow
version: {{ version }}
requirements:
host:
# only necessary for run-exports
- python
- numpy
run:
# Default set of libs because run-exports from libarrow-all aren't picked up
# Default doesn't contain flight, flight-sql and gandiva
- {{ pin_subpackage("libarrow", exact=True) }}
- {{ pin_subpackage("libarrow-acero", exact=True) }}
- {{ pin_subpackage("libarrow-dataset", exact=True) }}
- {{ pin_subpackage("libarrow-substrait", exact=True) }}
- {{ pin_subpackage("libparquet", exact=True) }}
- {{ pin_subpackage("pyarrow-core", exact=True) }}
- {{ pin_compatible('numpy') }}
- python
run_constrained:
- apache-arrow-proc =*={{ build_ext }}

test:
files:
- test_read_parquet.py
imports:
# pyarrow-all contains parquet
- pyarrow.dataset
- pyarrow.parquet
commands:
# Expected included libraries
- test -f $PREFIX/lib/libarrow_acero${SHLIB_EXT} # [unix]
- test -f $PREFIX/lib/libarrow_dataset${SHLIB_EXT} # [unix]
- test -f $PREFIX/lib/libparquet${SHLIB_EXT} # [unix]

# Expected not included libraries
- test ! -f $PREFIX/lib/libarrow_flight${SHLIB_EXT} # [unix]
- test ! -f $PREFIX/lib/libgandiva${SHLIB_EXT} # [unix]

# Parquet is included in pyarrow
- python test_read_parquet.py

about:
home: http://github.com/apache/arrow
license: Apache-2.0
license_file:
- LICENSE.txt
summary: Python libraries for Apache Arrow
summary: Python libraries for Apache Arrow with default capabilities

- name: pyarrow-all
version: {{ version }}
requirements:
host:
# only necessary for run-exports
- python
- numpy
run:
# full set of libs because run-exports from libarrow-all aren't picked up
- {{ pin_subpackage("libarrow", exact=True) }}
- {{ pin_subpackage("libarrow-acero", exact=True) }}
- {{ pin_subpackage("libarrow-dataset", exact=True) }}
- {{ pin_subpackage("libarrow-flight", exact=True) }}
- {{ pin_subpackage("libarrow-flight-sql", exact=True) }}
- {{ pin_subpackage("libarrow-gandiva", exact=True) }}
- {{ pin_subpackage("libarrow-substrait", exact=True) }}
- {{ pin_subpackage("libparquet", exact=True) }}
- {{ pin_subpackage("pyarrow", exact=True) }}
- {{ pin_compatible('numpy') }}
- python
run_constrained:
- apache-arrow-proc =*={{ build_ext }}

test:
files:
- test_read_parquet.py
imports:
- pyarrow.flight
- pyarrow.gandiva
commands:
# Expected included libraries
- test -f $PREFIX/lib/libarrow_flight${SHLIB_EXT} # [unix]
- test -f $PREFIX/lib/libgandiva${SHLIB_EXT} # [unix]
about:
home: http://github.com/apache/arrow
license: Apache-2.0
license_file:
- LICENSE.txt
summary: Python libraries for Apache Arrow with all capabilities

- name: pyarrow-tests
script: build-pyarrow.sh # [unix]
Expand All @@ -760,7 +871,7 @@ outputs:
- ninja
host:
- {{ pin_subpackage("libarrow-all", exact=True) }}
- {{ pin_subpackage('pyarrow', exact=True) }}
- {{ pin_subpackage('pyarrow-all', exact=True) }}
- clangdev {{ llvm_version }}
- llvmdev {{ llvm_version }}
- zlib
Expand All @@ -770,7 +881,7 @@ outputs:
- setuptools
- setuptools-scm
run:
- {{ pin_subpackage('pyarrow', exact=True) }}
- {{ pin_subpackage('pyarrow-all', exact=True) }}
- python
run_constrained:
- apache-arrow-proc =*={{ build_ext }}
Expand Down

0 comments on commit a632f02

Please sign in to comment.