Skip to content

Commit

Permalink
Fix array-record build.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 700840048
  • Loading branch information
iindyk authored and copybara-github committed Dec 3, 2024
1 parent 363d798 commit c396f83
Show file tree
Hide file tree
Showing 16 changed files with 202 additions and 179 deletions.
84 changes: 50 additions & 34 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,92 +6,101 @@ load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
# Abseil LTS 20230125.0
http_archive(
name = "com_google_absl",
sha256 = "3ea49a7d97421b88a8c48a0de16c16048e17725c7ec0f1d3ea2683a2a75adc21", # SHARED_ABSL_SHA
strip_prefix = "abseil-cpp-20230125.0",
sha256 = "987ce98f02eefbaf930d6e38ab16aa05737234d7afbab2d5c4ea7adbe50c28ed",
strip_prefix = "abseil-cpp-20230802.1",
urls = [
"https://github.com/abseil/abseil-cpp/archive/refs/tags/20230125.0.tar.gz",
"https://github.com/abseil/abseil-cpp/archive/refs/tags/20230802.1.tar.gz",
],
)

# Version: pypi-v0.11.0, 2020/10/27
git_repository(
name = "com_google_absl_py",
remote = "https://github.com/abseil/abseil-py",
commit = "127c98870edf5f03395ce9cf886266fa5f24455e",
remote = "https://github.com/abseil/abseil-py",
shallow_since = "1673401277 -0800",
)

# Needed by com_google_riegeli
http_archive(
name = "org_brotli",
sha256 = "84a9a68ada813a59db94d83ea10c54155f1d34399baf377842ff3ab9b3b3256e",
strip_prefix = "brotli-3914999fcc1fda92e750ef9190aa6db9bf7bdb07",
urls = ["https://github.com/google/brotli/archive/3914999fcc1fda92e750ef9190aa6db9bf7bdb07.zip"], # 2022-11-17
)

# GoogleTest/GoogleMock framework. Used by most unit-tests.
http_archive(
name = "com_google_googletest",
urls = ["https://github.com/google/googletest/archive/main.zip"],
strip_prefix = "googletest-main",
name = "com_google_googletest",
sha256 = "24e06e79a78ca5794ec6ad2bf0a1f05515cd1d05a9e10d9a6dc853078b2f3914",
strip_prefix = "googletest-main",
urls = ["https://github.com/google/googletest/archive/main.zip"],
)

# V3.4.0, 20210818
http_archive(
name = "eigen3",
sha256 = "b4c198460eba6f28d34894e3a5710998818515104d6e74e5cc331ce31e46e626",
strip_prefix = "eigen-3.4.0",
urls = [
"https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.bz2",
],
build_file_content =
"""
name = "eigen3",
build_file_content =
"""
cc_library(
name = 'eigen3',
srcs = [],
includes = ['.'],
hdrs = glob(['Eigen/**', 'unsupported/Eigen/**']),
visibility = ['//visibility:public'],
)
"""
""",
sha256 = "b4c198460eba6f28d34894e3a5710998818515104d6e74e5cc331ce31e46e626",
strip_prefix = "eigen-3.4.0",
urls = [
"https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.bz2",
],
)

# `pybind11_bazel` (https://github.com/pybind/pybind11_bazel): 20230130
http_archive(
name = "pybind11_bazel",
strip_prefix = "pybind11_bazel-5f458fa53870223a0de7eeb60480dd278b442698",
sha256 = "b35f3abc3d52ee5c753fdeeb2b5129b99e796558754ca5d245e28e51c1072a21",
urls = ["https://github.com/pybind/pybind11_bazel/archive/5f458fa53870223a0de7eeb60480dd278b442698.tar.gz"],
name = "pybind11_bazel",
sha256 = "b35f3abc3d52ee5c753fdeeb2b5129b99e796558754ca5d245e28e51c1072a21",
strip_prefix = "pybind11_bazel-5f458fa53870223a0de7eeb60480dd278b442698",
urls = ["https://github.com/pybind/pybind11_bazel/archive/5f458fa53870223a0de7eeb60480dd278b442698.tar.gz"],
)

# V2.10.3, 20230130
http_archive(
name = "pybind11",
build_file = "@pybind11_bazel//:pybind11.BUILD",
strip_prefix = "pybind11-2.10.3",
sha256 = "201966a61dc826f1b1879a24a3317a1ec9214a918c8eb035be2f30c3e9cfbdcb",
urls = ["https://github.com/pybind/pybind11/archive/refs/tags/v2.10.3.zip"],
name = "pybind11",
build_file = "@pybind11_bazel//:pybind11.BUILD",
sha256 = "201966a61dc826f1b1879a24a3317a1ec9214a918c8eb035be2f30c3e9cfbdcb",
strip_prefix = "pybind11-2.10.3",
urls = ["https://github.com/pybind/pybind11/archive/refs/tags/v2.10.3.zip"],
)

load("@pybind11_bazel//:python_configure.bzl", "python_configure")

python_configure(name = "local_config_python")

# V21.12, 20230130
# proto_library, cc_proto_library, and java_proto_library rules implicitly
# depend on @com_google_protobuf for protoc and proto runtimes.
# This statement defines the @com_google_protobuf repo.
http_archive(
name = "com_google_protobuf",
sha256 = "22fdaf641b31655d4b2297f9981fa5203b2866f8332d3c6333f6b0107bb320de",
strip_prefix = "protobuf-21.12",
urls = ["https://github.com/protocolbuffers/protobuf/archive/v21.12.tar.gz"],
sha256 = "dc167b7d23ec0d6e4a3d4eae1798de6c8d162e69fa136d39753aaeb7a6e1289d",
strip_prefix = "protobuf-23.1",
urls = ["https://github.com/protocolbuffers/protobuf/archive/v23.1.tar.gz"],
)

load("@com_google_protobuf//:protobuf_deps.bzl", "protobuf_deps")

protobuf_deps()

# Riegeli does not cut releases, so we reference the head
http_archive(
name = "com_google_riegeli",
strip_prefix = "riegeli-master",
sha256 = "5615438b3809fdd62266030e2c6f19c457a15bfb6ef3aa8132503e8584305f8a",
strip_prefix = "riegeli-254e6d74ee0d325676739fe5075e5a1a895624cf",
urls = [
"https://github.com/google/riegeli/archive/master.zip",
"https://github.com/google/riegeli/archive/254e6d74ee0d325676739fe5075e5a1a895624cf.tar.gz",
],
)

# Riegeli's dependencies
http_archive(
name = "net_zstd",
Expand All @@ -100,34 +109,39 @@ http_archive(
strip_prefix = "zstd-1.4.5/lib",
urls = ["https://github.com/facebook/zstd/archive/v1.4.5.zip"], # 2020-05-22
)

http_archive(
name = "lz4",
build_file = "@com_google_riegeli//third_party:lz4.BUILD",
sha256 = "4ec935d99aa4950eadfefbd49c9fad863185ac24c32001162c44a683ef61b580",
strip_prefix = "lz4-1.9.3/lib",
urls = ["https://github.com/lz4/lz4/archive/refs/tags/v1.9.3.zip"], # 2020-11-16
)

http_archive(
name = "snappy",
build_file = "@com_google_riegeli//third_party:snappy.BUILD",
sha256 = "7ee7540b23ae04df961af24309a55484e7016106e979f83323536a1322cedf1b",
strip_prefix = "snappy-1.2.0",
urls = ["https://github.com/google/snappy/archive/1.2.0.zip"], # 2024-04-04
)

http_archive(
name = "crc32c",
build_file = "@com_google_riegeli//third_party:crc32.BUILD",
sha256 = "338f1d9d95753dc3cdd882dfb6e176bbb4b18353c29c411ebcb7b890f361722e",
strip_prefix = "crc32c-1.1.0",
urls = ["https://github.com/google/crc32c/archive/1.1.0.zip"], # 2019-05-24
)

http_archive(
name = "zlib",
build_file = "@com_google_riegeli//third_party:zlib.BUILD",
sha256 = "c3e5e9fdd5004dcb542feda5ee4f0ff0744628baf8ed2dd5d66f8ca1197cb1a1",
strip_prefix = "zlib-1.2.11",
urls = ["http://zlib.net/fossils/zlib-1.2.11.tar.gz"], # 2017-01-15
)

http_archive(
name = "highwayhash",
build_file = "@com_google_riegeli//third_party:highwayhash.BUILD",
Expand All @@ -139,14 +153,16 @@ http_archive(
# Tensorflow, 20230705
http_archive(
name = "org_tensorflow",
strip_prefix = "tensorflow-2.12.1",
sha256 = "63025cb60d00d9aa7a88807651305a38abb9bb144464e2419c03f13a089d19a6",
strip_prefix = "tensorflow-2.12.1",
urls = ["https://github.com/tensorflow/tensorflow/archive/v2.12.1.zip"],
)

load("@org_tensorflow//tensorflow/tools/toolchains:cpus/aarch64/aarch64_compiler_configure.bzl", "aarch64_compiler_configure") # buildifier: disable=load-on-top

# This import (along with the org_tensorflow archive) is necessary to provide the devtoolset-9 toolchain
load("@org_tensorflow//tensorflow/tools/toolchains/remote_config:configs.bzl", "initialize_rbe_configs") # buildifier: disable=load-on-top
load("@org_tensorflow//tensorflow/tools/toolchains:cpus/aarch64/aarch64_compiler_configure.bzl", "aarch64_compiler_configure") # buildifier: disable=load-on-top

initialize_rbe_configs()

aarch64_compiler_configure()
15 changes: 6 additions & 9 deletions cpp/BUILD
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# ArrayRecord is a new file format for IO intensive applications.
# It supports efficient random access and various compression algorithms.

load("//third_party/protobuf/bazel:proto_library.bzl", "proto_library")
load("@rules_proto//proto:defs.bzl", "proto_library")

package(default_visibility = ["//visibility:public"])

Expand All @@ -17,11 +17,6 @@ cc_proto_library(
deps = [":layout_proto"],
)

go_proto_library(
name = "layout_go_proto",
deps = [":layout_proto"],
)

cc_library(
name = "common",
hdrs = ["common.h"],
Expand Down Expand Up @@ -120,13 +115,13 @@ cc_library(
":sequenced_chunk_writer",
":shareable_dependency",
":thread_pool",
"//third_party/protobuf:protobuf_lite",
"@com_google_absl//absl/base:core_headers",
"@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/synchronization",
"@com_google_absl//absl/types:span",
"@com_google_protobuf//:protobuf_lite",
"@com_google_riegeli//riegeli/base:initializer",
"@com_google_riegeli//riegeli/base:object",
"@com_google_riegeli//riegeli/base:options_parser",
Expand All @@ -152,8 +147,10 @@ cc_library(
deps = [
":common",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/status",
"@com_google_absl//absl/time",
"@com_google_absl//absl/types:optional",
"@com_google_riegeli//riegeli/base:object",
"@com_google_riegeli//riegeli/base:shared_buffer",
"@com_google_riegeli//riegeli/base:status",
"@com_google_riegeli//riegeli/base:types",
"@com_google_riegeli//riegeli/bytes:reader",
Expand All @@ -171,7 +168,6 @@ cc_library(
":parallel_for",
":shareable_dependency",
":thread_pool",
"//third_party/protobuf:protobuf_lite",
"@com_google_absl//absl/base:core_headers",
"@com_google_absl//absl/functional:any_invocable",
"@com_google_absl//absl/functional:function_ref",
Expand All @@ -180,6 +176,7 @@ cc_library(
"@com_google_absl//absl/strings",
"@com_google_absl//absl/strings:str_format",
"@com_google_absl//absl/types:span",
"@com_google_protobuf//:protobuf_lite",
"@com_google_riegeli//riegeli/base:initializer",
"@com_google_riegeli//riegeli/base:object",
"@com_google_riegeli//riegeli/base:options_parser",
Expand Down
36 changes: 17 additions & 19 deletions cpp/array_record_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,9 @@ limitations under the License.
#include "cpp/masked_reader.h"
#include "cpp/parallel_for.h"
#include "cpp/thread_pool.h"
#include "third_party/protobuf/message_lite.h"
#include "riegeli/base/maker.h"
#include "google/protobuf/message_lite.h"
#include "riegeli/base/object.h"
#include "riegeli/base/options_parser.h"
#include "riegeli/base/shared_ptr.h"
#include "riegeli/base/status.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/chunk_encoding/chunk.h"
Expand Down Expand Up @@ -170,12 +168,12 @@ ChunkDecoder ReadChunk(Reader& reader, size_t pos, size_t len) {
decoder.Fail(reader.status());
return decoder;
}
MaskedReader masked_reader(*reader.NewReader(pos), len);
MaskedReader masked_reader(reader.NewReader(pos), len);
if (!masked_reader.ok()) {
decoder.Fail(masked_reader.status());
return decoder;
}
riegeli::DefaultChunkReader chunk_reader(&masked_reader);
auto chunk_reader = riegeli::DefaultChunkReader<>(&masked_reader);
Chunk chunk;
if (!chunk_reader.ReadChunk(chunk)) {
decoder.Fail(chunk_reader.status());
Expand Down Expand Up @@ -345,15 +343,15 @@ absl::Status ArrayRecordReaderBase::ParallelReadRecords(
MaskedReader masked_reader(riegeli::kClosed);
{
AR_ENDO_SCOPE("MaskedReader");
masked_reader.Reset(
*reader->NewReader(state_->chunk_offsets[chunk_idx_start]),
masked_reader = MaskedReader(
reader->NewReader(state_->chunk_offsets[chunk_idx_start]),
buf_len);
}
for (uint64_t chunk_idx = chunk_idx_start; chunk_idx <= last_chunk_idx;
++chunk_idx) {
AR_ENDO_SCOPE("ChunkReader+ChunkDecoder");
masked_reader.Seek(state_->chunk_offsets[chunk_idx]);
riegeli::DefaultChunkReader chunk_reader(&masked_reader);
riegeli::DefaultChunkReader<> chunk_reader(&masked_reader);
Chunk chunk;
if (ABSL_PREDICT_FALSE(!chunk_reader.ReadChunk(chunk))) {
return chunk_reader.status();
Expand Down Expand Up @@ -422,15 +420,15 @@ absl::Status ArrayRecordReaderBase::ParallelReadRecordsInRange(
MaskedReader masked_reader(riegeli::kClosed);
{
AR_ENDO_SCOPE("MaskedReader");
masked_reader.Reset(
*reader->NewReader(state_->chunk_offsets[chunk_idx_start]),
masked_reader = MaskedReader(
reader->NewReader(state_->chunk_offsets[chunk_idx_start]),
buf_len);
}
for (uint64_t chunk_idx = chunk_idx_start; chunk_idx <= last_chunk_idx;
++chunk_idx) {
AR_ENDO_SCOPE("ChunkReader+ChunkDecoder");
masked_reader.Seek(state_->chunk_offsets[chunk_idx]);
riegeli::DefaultChunkReader chunk_reader(&masked_reader);
riegeli::DefaultChunkReader<> chunk_reader(&masked_reader);
Chunk chunk;
if (ABSL_PREDICT_FALSE(!chunk_reader.ReadChunk(chunk))) {
return chunk_reader.status();
Expand Down Expand Up @@ -541,14 +539,14 @@ absl::Status ArrayRecordReaderBase::ParallelReadRecordsWithIndices(
MaskedReader masked_reader(riegeli::kClosed);
{
AR_ENDO_SCOPE("MaskedReader");
masked_reader.Reset(
*reader->NewReader(state_->chunk_offsets[buffer_chunks[0]]),
masked_reader = MaskedReader(
reader->NewReader(state_->chunk_offsets[buffer_chunks[0]]),
buf_len);
}
for (auto chunk_idx : buffer_chunks) {
AR_ENDO_SCOPE("ChunkReader+ChunkDecoder");
masked_reader.Seek(state_->chunk_offsets[chunk_idx]);
riegeli::DefaultChunkReader chunk_reader(&masked_reader);
riegeli::DefaultChunkReader<> chunk_reader(&masked_reader);
Chunk chunk;
if (ABSL_PREDICT_FALSE(!chunk_reader.ReadChunk(chunk))) {
return chunk_reader.status();
Expand Down Expand Up @@ -688,8 +686,8 @@ bool ArrayRecordReaderBase::ReadAheadFromBuffer(uint64_t buffer_idx) {
// movable, OSS ThreadPool only takes std::function which requires all the
// captures to be copyable. Therefore we must wrap the promise in a
// shared_ptr to copy it over to the scheduled task.
riegeli::SharedPtr decoder_promise(
riegeli::Maker<std::promise<std::vector<ChunkDecoder>>>());
auto decoder_promise =
std::make_shared<std::promise<std::vector<ChunkDecoder>>>();
state_->future_decoders.push(
{buffer_to_add, decoder_promise->get_future()});
const auto reader = get_backing_reader();
Expand Down Expand Up @@ -721,8 +719,8 @@ bool ArrayRecordReaderBase::ReadAheadFromBuffer(uint64_t buffer_idx) {
MaskedReader masked_reader(riegeli::kClosed);
{
AR_ENDO_SCOPE("MaskedReader");
masked_reader.Reset(*reader->NewReader(chunk_offsets.front()),
buffer_len);
masked_reader =
MaskedReader(reader->NewReader(chunk_offsets.front()), buffer_len);
}
if (!masked_reader.ok()) {
for (auto& decoder : decoders) {
Expand All @@ -735,7 +733,7 @@ bool ArrayRecordReaderBase::ReadAheadFromBuffer(uint64_t buffer_idx) {
AR_ENDO_SCOPE("ChunkReader+ChunkDecoder");
for (auto local_chunk_idx : IndicesOf(chunk_offsets)) {
masked_reader.Seek(chunk_offsets[local_chunk_idx]);
riegeli::DefaultChunkReader chunk_reader(&masked_reader);
auto chunk_reader = riegeli::DefaultChunkReader<>(&masked_reader);
Chunk chunk;
if (!chunk_reader.ReadChunk(chunk)) {
decoders[local_chunk_idx].Fail(chunk_reader.status());
Expand Down
5 changes: 3 additions & 2 deletions cpp/array_record_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ limitations under the License.
#include "cpp/common.h"
#include "cpp/shareable_dependency.h"
#include "cpp/thread_pool.h"
#include "third_party/protobuf/message_lite.h"
#include "google/protobuf/message_lite.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/object.h"
#include "riegeli/bytes/reader.h"
Expand Down Expand Up @@ -378,7 +378,8 @@ template <typename Src>
explicit ArrayRecordReader(
Src&& src,
ArrayRecordReaderBase::Options options = ArrayRecordReaderBase::Options(),
ARThreadPool* pool = nullptr) -> ArrayRecordReader<riegeli::TargetT<Src>>;
ARThreadPool* pool = nullptr) ->
ArrayRecordReader<riegeli::InitializerTargetT<Src>>;

} // namespace array_record

Expand Down
Loading

0 comments on commit c396f83

Please sign in to comment.