From fb5ed0246b56ad1e1929617a39024c91f9f4b367 Mon Sep 17 00:00:00 2001 From: Alexandra Rahlin Date: Tue, 15 Aug 2023 17:40:02 -0500 Subject: [PATCH 1/7] Add frame-level bzip2 compression and decompression (#112) This PR integrates bzip2 support to be on par with gzip support for G3 file I/O. Files with the ".g3.bz2" extension will run through the appropriate bz2 algorithm on read and write. Bzip2 support is optional and automatically detected by cmake, but can be manually overridden using a cmake argument. --- .github/workflows/cmake.yml | 4 +-- README.rst | 3 ++- core/CMakeLists.txt | 16 ++++++++++- core/src/G3MultiFileWriter.cxx | 9 +++++++ core/src/G3Writer.cxx | 9 +++++++ core/src/dataio.cxx | 9 ++++++- core/tests/bz2fileio.py | 49 ++++++++++++++++++++++++++++++++++ 7 files changed, 94 insertions(+), 5 deletions(-) create mode 100644 core/tests/bz2fileio.py diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 2ff7abc3..ecf4e2f2 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -23,11 +23,11 @@ jobs: if [ `echo ${{matrix.os}} | cut -d - -f 1` = "ubuntu" ]; then sudo apt-get --allow-releaseinfo-change update -y [ `echo ${{matrix.os}} | cut -d . -f 1` = "ubuntu-20" ] && sudo apt-get install python-is-python3 - sudo apt-get install libboost-all-dev libflac-dev libnetcdf-dev python python3 python3-pip python3-setuptools + sudo apt-get install libbz2-dev libboost-all-dev libflac-dev libnetcdf-dev python python3 python3-pip python3-setuptools elif [ `echo ${{matrix.os}} | cut -d - -f 1` = "macOS" ]; then brew install python@3.10 brew link --overwrite python@3.10 - brew install boost boost-python3 flac netcdf + brew install bzip2 boost boost-python3 flac netcdf else echo 'No installed package manager!' exit 1 diff --git a/README.rst b/README.rst index aa1e131a..892cc967 100644 --- a/README.rst +++ b/README.rst @@ -79,8 +79,9 @@ This will construct an html version of the documentation. This builds the docum Installation ------------ -For various reasons it may be useful to install the software after building, instead of continuing to use it out of the build directory. Two CMake variables control how the software is installed: +For various reasons it may be useful to install the software after building, instead of continuing to use it out of the build directory. Several CMake variables control how the software is installed: + * ``WITH_BZIP2``, which defaults to ``TRUE``, is used to control whether the core library is built with support for bzip2 compression of G3 files. Use ``-DWITH_BZIP2=FALSE`` when calling ``cmake`` to disable. * ``CMAKE_INSTALL_PREFIX``, which defaults to ``/usr/local`` is used as the root directory for installing all non-python components (header files, cmake export scripts, etc.) * ``PYTHON_MODULE_DIR``, which if not explicitly set defaults to the result of running `distutils.sysconfig.get_python_lib ` with the selected python interpreter, is where the python module will be installed. diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 8efa3cb9..8545847e 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -35,6 +35,17 @@ if(FLAC_FOUND) target_link_libraries(core ${FLAC_LIBRARIES}) endif() +# Link against BZIP2 library +if(NOT DEFINED WITH_BZIP2) + set(WITH_BZIP2 TRUE CACHE BOOL "Enable bzip2 file compression") +endif() +if(WITH_BZIP2) + find_package(BZip2) +endif() +if(BZIP2_FOUND) + target_compile_definitions(core PRIVATE -DBZIP2_FOUND) +endif() + link_python_dir() add_spt3g_program(bin/spt3g-dump) @@ -59,6 +70,9 @@ add_spt3g_test(fileio) add_spt3g_test(multifileio) add_spt3g_test(splitfileio) add_spt3g_test(compressedfileio) +if(BZIP2_FOUND) + add_spt3g_test(bz2fileio) +endif() add_spt3g_test(portability) add_spt3g_test(vecint) add_spt3g_test(ts_bufferprotocol) @@ -84,4 +98,4 @@ add_spt3g_test_program(test SOURCE_FILES ${CMAKE_CURRENT_SOURCE_DIR}/tests/G3TimestreamTest.cxx ${CMAKE_CURRENT_SOURCE_DIR}/tests/G3TimestreamMapTest.cxx - USE_PROJECTS core) \ No newline at end of file + USE_PROJECTS core) diff --git a/core/src/G3MultiFileWriter.cxx b/core/src/G3MultiFileWriter.cxx index 5bce63c7..e8acc4af 100644 --- a/core/src/G3MultiFileWriter.cxx +++ b/core/src/G3MultiFileWriter.cxx @@ -3,7 +3,9 @@ #include #include #include +#ifdef BZIP2_FOUND #include +#endif #include #include #include @@ -141,6 +143,13 @@ G3MultiFileWriter::CheckNewFile(G3FramePtr frame) if (boost::algorithm::ends_with(filename, ".gz")) stream_.push(boost::iostreams::gzip_compressor()); + if (boost::algorithm::ends_with(filename, ".bz2")) { +#ifdef BZIP2_FOUND + stream_.push(boost::iostreams::bzip2_compressor()); +#else + log_fatal("Boost not compiled with bzip2 support."); +#endif + } stream_.push(boost::iostreams::counter64()); stream_.push(boost::iostreams::file_sink(filename, std::ios::binary)); diff --git a/core/src/G3Writer.cxx b/core/src/G3Writer.cxx index 0e327fb7..11eb3420 100644 --- a/core/src/G3Writer.cxx +++ b/core/src/G3Writer.cxx @@ -4,7 +4,9 @@ #include #include #include +#ifdef BZIP2_FOUND #include +#endif #include G3Writer::G3Writer(std::string filename, @@ -20,6 +22,13 @@ G3Writer::G3Writer(std::string filename, if (boost::algorithm::ends_with(filename, ".gz") && !append) stream_.push(boost::iostreams::gzip_compressor()); + if (boost::algorithm::ends_with(filename, ".bz2") && !append) { +#ifdef BZIP2_FOUND + stream_.push(boost::iostreams::bzip2_compressor()); +#else + log_fatal("Boost not compiled with bzip2 support."); +#endif + } std::ios_base::openmode mode = std::ios::binary; if (append) diff --git a/core/src/dataio.cxx b/core/src/dataio.cxx index 1a38f4f6..b909f030 100644 --- a/core/src/dataio.cxx +++ b/core/src/dataio.cxx @@ -4,7 +4,9 @@ #include #include #include +#ifdef BZIP2_FOUND #include +#endif #include #include #include @@ -23,8 +25,13 @@ g3_istream_from_path(boost::iostreams::filtering_istream &stream, stream.reset(); if (boost::algorithm::ends_with(path, ".gz")) stream.push(boost::iostreams::gzip_decompressor()); - if (boost::algorithm::ends_with(path, ".bz2")) + if (boost::algorithm::ends_with(path, ".bz2")) { +#ifdef BZIP2_FOUND stream.push(boost::iostreams::bzip2_decompressor()); +#else + log_fatal("Boost not compiled with bzip2 support."); +#endif + } int fd = -1; diff --git a/core/tests/bz2fileio.py b/core/tests/bz2fileio.py new file mode 100644 index 00000000..e89fc6c6 --- /dev/null +++ b/core/tests/bz2fileio.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python + +from spt3g import core +import time + +# File to disk +pipe = core.G3Pipeline() +pipe.Add(core.G3InfiniteSource, type=core.G3FrameType.Timepoint, n=10) +n = 0 + + +def addinfo(fr): + global n + if fr.type != core.G3FrameType.Timepoint: + return + fr["time"] = core.G3Time(int(time.time() * core.G3Units.s)) + fr["count"] = n + n += 1 + + +pipe.Add(addinfo) +pipe.Add(core.Dump) +pipe.Add(core.G3Writer, filename="test.g3.bz2") +pipe.Run() + +# And back from disk +print("Reading") +pipe = core.G3Pipeline() +pipe.Add(core.G3Reader, filename="test.g3.bz2") +pipe.Add(core.Dump) +n = 0 + + +def checkinfo(fr): + global n + if fr.type != core.G3FrameType.Timepoint: + return + if "time" not in fr: + raise KeyError("time") + if fr["count"] != n: + raise ValueError("Out of order frame") + n += 1 + + +pipe.Add(checkinfo) +pipe.Run() + +if n != 10: + raise ValueError("Wrong number of frames (%d should be %d)" % (n, 10)) From f37bb5fa96b44f3a74fbd9323f907bf25e284239 Mon Sep 17 00:00:00 2001 From: Sasha Rahlin Date: Wed, 16 Aug 2023 10:09:46 -0500 Subject: [PATCH 2/7] Ensure NetCDF headers are included --- dfmux/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dfmux/CMakeLists.txt b/dfmux/CMakeLists.txt index b6982523..c4d9d1d9 100644 --- a/dfmux/CMakeLists.txt +++ b/dfmux/CMakeLists.txt @@ -14,6 +14,9 @@ add_spt3g_library(dfmux SHARED ${DFMUX_LIB_EXTRA_SRC} ) target_link_libraries(dfmux core ${DFMUX_LIB_EXTRA_LIB}) +if (NETCDF_FOUND) + target_include_directories(dfmux PRIVATE ${NETCDF_INCLUDES}) +endif() if (NETCDF_FOUND) add_spt3g_program(bin/ledgerman.py ledgerman) From 4302420af46fd0334c0d61f1e0974ab0404b7e1f Mon Sep 17 00:00:00 2001 From: Sasha Rahlin Date: Wed, 16 Aug 2023 10:10:12 -0500 Subject: [PATCH 3/7] Allow overriding CMAKE_FIND_PACKAGE_PREFER_CONFIG option The default behavior (TRUE) remains unchanged. --- cmake/Spt3gBoostPython.cmake | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cmake/Spt3gBoostPython.cmake b/cmake/Spt3gBoostPython.cmake index de520a26..8cc1bcd9 100644 --- a/cmake/Spt3gBoostPython.cmake +++ b/cmake/Spt3gBoostPython.cmake @@ -45,7 +45,9 @@ else() endif() # suppress configuration warnings in newer cmake / boost versions -set(CMAKE_FIND_PACKAGE_PREFER_CONFIG TRUE) +if(NOT DEFINED CMAKE_FIND_PACKAGE_PREFER_CONFIG) + set(CMAKE_FIND_PACKAGE_PREFER_CONFIG TRUE) +endif() if(NOT DEFINED Boost_PYTHON_TYPE) set(Boost_PYTHON_TYPE python) From 5dd50a7834bac2cf3825216295aaad52068d44a6 Mon Sep 17 00:00:00 2001 From: Sasha Rahlin Date: Wed, 16 Aug 2023 10:19:11 -0500 Subject: [PATCH 4/7] Use CMAKE_COMMAND variable to make sure the correct executable is run --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e4328cf7..4067b1de 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -168,7 +168,7 @@ endif(${CMAKE_VERSION} VERSION_GREATER_EQUAL 3.17) # Target for version string add_custom_target(version ALL COMMAND sh ${CMAKE_SOURCE_DIR}/cmake/getvers.sh ${CMAKE_SOURCE_DIR} ${CMAKE_BINARY_DIR}/spt3g/version.py - COMMAND cmake -P ${CMAKE_SOURCE_DIR}/cmake/Spt3gVersion.cmake ${CMAKE_SOURCE_DIR} ${CMAKE_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} -P ${CMAKE_SOURCE_DIR}/cmake/Spt3gVersion.cmake ${CMAKE_SOURCE_DIR} ${CMAKE_BINARY_DIR} BYPRODUCTS ${CMAKE_BINARY_DIR}/spt3g/version.py ${CMAKE_BINARY_DIR}/cmake/Spt3gConfigVersion.cmake COMMENT "Regenerating VCS version information" ) @@ -184,7 +184,7 @@ add_custom_target(tarball # Add target to generate documentation add_custom_target(docs - COMMAND cmake -P ${CMAKE_SOURCE_DIR}/cmake/gen_per_module_docs.cmake ${CMAKE_SOURCE_DIR} ${CMAKE_BINARY_DIR} ${Python_EXECUTABLE} + COMMAND ${CMAKE_COMMAND} -P ${CMAKE_SOURCE_DIR}/cmake/gen_per_module_docs.cmake ${CMAKE_SOURCE_DIR} ${CMAKE_BINARY_DIR} ${Python_EXECUTABLE} COMMAND mkdir -p ${CMAKE_BINARY_DIR}/docs COMMAND which sphinx-build && sphinx-build -b html ${CMAKE_SOURCE_DIR}/doc ${CMAKE_BINARY_DIR}/docs || echo "Error: sphinx-build not found. Could not generate HTML docs." COMMENT "Generate HTML documentation") From a64c03e96f28fb9d9e6195d5e07b11172b1a9cb9 Mon Sep 17 00:00:00 2001 From: Alexandra Rahlin Date: Wed, 16 Aug 2023 10:26:03 -0500 Subject: [PATCH 5/7] Print core logging messages with timestamps by default (#111) Timestamps on core logging messages are very useful for rudimentary profiling, especially for long running scripts. While the profile option for G3Pipeline.Run() provides similar functionality, it's only useful for scripts that are entirely pipeline-based, which is often not the case. Moreover, timestamps on log messages make it easy to track the progress of a script in real-time, rather than waiting until it has finished running to find out how long it took. Closes #50. --- core/src/G3PrintfLogger.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/G3PrintfLogger.cxx b/core/src/G3PrintfLogger.cxx index e523f82e..9224f510 100644 --- a/core/src/G3PrintfLogger.cxx +++ b/core/src/G3PrintfLogger.cxx @@ -4,7 +4,7 @@ #include G3PrintfLogger::G3PrintfLogger(G3LogLevel level) - : G3Logger(level), TrimFileNames(true), Timestamps(false) + : G3Logger(level), TrimFileNames(true), Timestamps(true) { tty_ = isatty(STDERR_FILENO); } From 6dd855dddcca1b887d058ac18eaf9a68bebea1a7 Mon Sep 17 00:00:00 2001 From: Marius Millea Date: Wed, 16 Aug 2023 12:32:32 -0700 Subject: [PATCH 6/7] Partial installation with poetry/pip (#97) add pypoetry.toml to allow poetry/pip editable install --------- Co-authored-by: Alexandra Rahlin --- CMakeLists.txt | 4 ++-- cmake/getvers.sh | 20 +++++++++++++++----- cmake/pyproject.toml.in | 17 +++++++++++++++++ doc/quickstart.rst | 4 +++- 4 files changed, 37 insertions(+), 8 deletions(-) create mode 100644 cmake/pyproject.toml.in diff --git a/CMakeLists.txt b/CMakeLists.txt index 4067b1de..edafb052 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -167,9 +167,9 @@ endif(${CMAKE_VERSION} VERSION_GREATER_EQUAL 3.17) # Target for version string add_custom_target(version ALL - COMMAND sh ${CMAKE_SOURCE_DIR}/cmake/getvers.sh ${CMAKE_SOURCE_DIR} ${CMAKE_BINARY_DIR}/spt3g/version.py + COMMAND sh ${CMAKE_SOURCE_DIR}/cmake/getvers.sh ${CMAKE_SOURCE_DIR} ${CMAKE_BINARY_DIR} COMMAND ${CMAKE_COMMAND} -P ${CMAKE_SOURCE_DIR}/cmake/Spt3gVersion.cmake ${CMAKE_SOURCE_DIR} ${CMAKE_BINARY_DIR} - BYPRODUCTS ${CMAKE_BINARY_DIR}/spt3g/version.py ${CMAKE_BINARY_DIR}/cmake/Spt3gConfigVersion.cmake + BYPRODUCTS ${CMAKE_BINARY_DIR}/spt3g/version.py ${CMAKE_BINARY_DIR}/pyproject.toml ${CMAKE_BINARY_DIR}/cmake/Spt3gConfigVersion.cmake COMMENT "Regenerating VCS version information" ) diff --git a/cmake/getvers.sh b/cmake/getvers.sh index e161caad..937f4214 100755 --- a/cmake/getvers.sh +++ b/cmake/getvers.sh @@ -1,12 +1,22 @@ #!/bin/sh -# Usage: getvers.sh +# Usage: getvers.sh set -e +cd $1 -exec 1>$2 +# PEP440-compliant version number for pyproject.toml +if [ -d .git ]; then + # replaces first - with .dev and second - with +, so e.g. 0.3-154-gd36baf4a becomes 0.3.dev154+gd36baf4a + fullversion_pep440=$(echo $(git describe --always --tags 2>/dev/null) | sed 's/-/.dev/' | sed 's/-/+/') +fi +fullversion_pep440="${fullversion_pep440:-0.1.0+unknown}" # fallback for SVN or error above +sed "s/\\\$Version\\\$/$fullversion_pep440/" $1/cmake/pyproject.toml.in > $2/pyproject.toml + + +# version.py version info +exec 1>"$2/spt3g/version.py" -cd $1 echo '# AUTO-GENERATED FILE: DO NOT EDIT' echo @@ -119,14 +129,14 @@ elif [ -d .git ]; then echo localdiffs=False fi echo versionname=\"$(git tag -l --points-at HEAD 2>/dev/null)\" - echo fullversion=\"$(git describe --always --tags --dirty 2>/dev/null)\" + echo fullversion=\"$fullversion_pep440\" else echo upstream_url=\"UNKNOWN VCS\" echo upstream_branch=\"UNKNOWN VCS\" echo revision=\"UNKNOWN VCS\" echo gitrevision=\"UNKNOWN\" echo versionname=\"UNKNOWN\" - if [ "$(cat VERSION)" == "\$Version\$" ]; then + if [ "$(cat VERSION)" = '$Version$' ]; then echo localdiffs=True echo fullversion=\"UNKNOWN\" else diff --git a/cmake/pyproject.toml.in b/cmake/pyproject.toml.in new file mode 100644 index 00000000..b9268b9e --- /dev/null +++ b/cmake/pyproject.toml.in @@ -0,0 +1,17 @@ +[tool.poetry] +name = "spt3g" +version = "$Version$" +description = "SPT3G Analysis and DAQ Software" +authors = ["SPT Collaboration"] + +[tool.poetry.dependencies] +python = "^3.7" +numpy = "^1.15" +astropy = "^5" +scipy = "^1.4.1" +pandas = "^1" +healpy = "^1.13" + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" diff --git a/doc/quickstart.rst b/doc/quickstart.rst index 8fc464a4..13a1c07e 100644 --- a/doc/quickstart.rst +++ b/doc/quickstart.rst @@ -83,12 +83,14 @@ Passing ``-jN`` to ``make``, where N is the number of cores you wish to use duri By default, this will use the system's standard Python installation (whatever you get if you just run ``python``). If you want a different python, you can specify that python through passing the argument ``-DPYTHON_EXECUTABLE=`` to cmake. For example, to use Python 3 if Python 3 is not the default, replace the cmake command above with ``cmake -DPYTHON_EXECUTABLE=`which python3```. Note that, if you do this, make *sure* that a Boost library built for the version of Python you are using exists -- generally, installing everything from the system package manager will ensure this. -Once that is complete, you can use the ``env-shell.sh`` script in the build directory to set up the appropriate environment variables (PYTHONPATH, etc.): +Once that is complete, you can either use the ``env-shell.sh`` script in the build directory to set up the appropriate environment variables (PYTHONPATH, etc.): .. code-block:: sh ./env-shell.sh +or you can ``pip install -e /path/to/spt3g_software/build`` (needs pip>=22) or ``poetry add -e /path/to/spt3g_software/build`` to partially install spt3g_software and all of its Python dependencies in "editable mode" into whatever Python environment you'd like. In either case 1) the ``build`` directory should be kept around, since the installation will read directly from this folder, and 2) the version of Python in the environment you are using should be the same as the one used to build spt3g_software. Note that this method does not install the headers and compiled binaries for use by downstream projects; the installation instructions below provide a more complete method for this. + Installation ============ From 0c5986b068c6c2ec8423baf5ab147211b6527b15 Mon Sep 17 00:00:00 2001 From: Sasha Rahlin Date: Mon, 28 Aug 2023 09:42:09 -0500 Subject: [PATCH 7/7] Don't raise an error if the cached key is null --- core/python/g3decorators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/python/g3decorators.py b/core/python/g3decorators.py index 41cd4208..80126966 100644 --- a/core/python/g3decorators.py +++ b/core/python/g3decorators.py @@ -89,7 +89,7 @@ def __init__(self, *args, **kwargs): def __call__(self, frame): for vname, stored_key in self.argument_map.items(): - if stored_key in frame: + if stored_key and stored_key in frame: self.kwargs[vname] = frame[stored_key] if self_outer.type is None or frame.type == self_outer.type: return f(frame, *(self.args), **(self.kwargs))