Skip to content

Commit

Permalink
[fix](cloud) fix read cache block file return stat NOT_FOUND (apache#…
Browse files Browse the repository at this point in the history
…43220)

Previously, cache blocks could be downloaded as different types than the
target type we intended to read, leading to false cache misses. E.g., a
block might be downloaded as an 'idx' type when the current context
expected a 'ttl' type.

The root cause of this problem is the original design encoding meta info
such as type and expiration time into cache block file path and readers
of this cache block file have inconsistent view of the type so they use
different name to locate file and run into error in the end.

This commit tries other type if the initial type failed to locate the
file (return NOT_FOUND). Be ware that this is a nasty quick fix. We will
elimite the metadate encoded in the file path in the near future to get
rid of all the path related problems.
  • Loading branch information
freemandealer authored Nov 8, 2024
1 parent d3d8a21 commit f0ee17b
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 23 deletions.
2 changes: 0 additions & 2 deletions be/src/common/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1016,8 +1016,6 @@ DEFINE_mInt64(file_cache_ttl_valid_check_interval_second, "0"); // zero for not
// If true, evict the ttl cache using LRU when full.
// Otherwise, only expiration can evict ttl and new data won't add to cache when full.
DEFINE_Bool(enable_ttl_cache_evict_using_lru, "true");
// rename ttl filename to new format during read, with some performance cost
DEFINE_mBool(translate_to_new_ttl_format_during_read, "false");
DEFINE_mBool(enbale_dump_error_file, "true");
// limit the max size of error log on disk
DEFINE_mInt64(file_cache_error_log_limit_bytes, "209715200"); // 200MB
Expand Down
2 changes: 0 additions & 2 deletions be/src/common/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -1063,8 +1063,6 @@ DECLARE_mInt64(file_cache_ttl_valid_check_interval_second);
// If true, evict the ttl cache using LRU when full.
// Otherwise, only expiration can evict ttl and new data won't add to cache when full.
DECLARE_Bool(enable_ttl_cache_evict_using_lru);
// rename ttl filename to new format during read, with some performance cost
DECLARE_Bool(translate_to_new_ttl_format_during_read);
DECLARE_mBool(enbale_dump_error_file);
// limit the max size of error log on disk
DECLARE_mInt64(file_cache_error_log_limit_bytes);
Expand Down
2 changes: 2 additions & 0 deletions be/src/io/cache/cached_remote_file_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,8 @@ Status CachedRemoteFileReader::read_at_impl(size_t offset, Slice result, size_t*
file_offset);
}
if (!st || block_state != FileBlock::State::DOWNLOADED) {
LOG(WARNING) << "Read data failed from file cache downloaded by others. err="
<< st.msg() << ", block state=" << block_state;
size_t bytes_read {0};
stats.hit_cache = false;
s3_read_counter << 1;
Expand Down
55 changes: 36 additions & 19 deletions be/src/io/cache/fs_file_cache_storage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,30 +160,36 @@ Status FSFileCacheStorage::read(const FileCacheKey& key, size_t value_offset, Sl
get_path_in_local_cache(get_path_in_local_cache(key.hash, key.meta.expiration_time),
key.offset, key.meta.type);
Status s = fs->open_file(file, &file_reader);
if (!s.ok()) {
if (!s.is<ErrorCode::NOT_FOUND>() || key.meta.type != FileCacheType::TTL) {
return s;

// handle the case that the file is not found but actually exists in other type format
// TODO(zhengyu): nasty! better eliminate the type encoding in file name in the future
if (!s.ok() && !s.is<ErrorCode::NOT_FOUND>()) {
LOG(WARNING) << "open file failed, file=" << file << ", error=" << s.to_string();
return s; // return other error directly
} else if (!s.ok() && s.is<ErrorCode::NOT_FOUND>()) { // but handle NOT_FOUND error
auto candidates = get_path_in_local_cache_all_candidates(
get_path_in_local_cache(key.hash, key.meta.expiration_time), key.offset);
for (auto& candidate : candidates) {
s = fs->open_file(candidate, &file_reader);
if (s.ok()) {
break; // success with one of there candidates
}
}
std::string file_old_format = get_path_in_local_cache_old_ttl_format(
get_path_in_local_cache(key.hash, key.meta.expiration_time), key.offset,
key.meta.type);
if (config::translate_to_new_ttl_format_during_read) {
// try to rename the file with old ttl format to new and retry
VLOG(7) << "try to rename the file with old ttl format to new and retry"
<< " oldformat=" << file_old_format << " original=" << file;
RETURN_IF_ERROR(fs->rename(file_old_format, file));
RETURN_IF_ERROR(fs->open_file(file, &file_reader));
} else {
// try to open the file with old ttl format
VLOG(7) << "try to open the file with old ttl format"
<< " oldformat=" << file_old_format << " original=" << file;
RETURN_IF_ERROR(fs->open_file(file_old_format, &file_reader));
if (!s.ok()) { // still not found, return error
LOG(WARNING) << "open file failed, file=" << file << ", error=" << s.to_string();
return s;
}
}
} // else, s.ok() means open file success

FDCache::instance()->insert_file_reader(fd_key, file_reader);
}
size_t bytes_read = 0;
RETURN_IF_ERROR(file_reader->read_at(value_offset, buffer, &bytes_read));
auto s = file_reader->read_at(value_offset, buffer, &bytes_read);
if (!s.ok()) {
LOG(WARNING) << "read file failed, file=" << file_reader->path()
<< ", error=" << s.to_string();
return s;
}
DCHECK(bytes_read == buffer.get_size());
return Status::OK();
}
Expand Down Expand Up @@ -270,6 +276,17 @@ std::string FSFileCacheStorage::get_path_in_local_cache_old_ttl_format(const std
return Path(dir) / (std::to_string(offset) + BlockFileCache::cache_type_to_string(type));
}

std::vector<std::string> FSFileCacheStorage::get_path_in_local_cache_all_candidates(
const std::string& dir, size_t offset) {
std::vector<std::string> candidates;
std::string base = get_path_in_local_cache(dir, offset, FileCacheType::NORMAL);
candidates.push_back(base);
candidates.push_back(base + "_idx");
candidates.push_back(base + "_ttl");
candidates.push_back(base + "_disposable");
return candidates;
}

std::string FSFileCacheStorage::get_path_in_local_cache(const UInt128Wrapper& value,
uint64_t expiration_time) const {
auto str = value.to_string();
Expand Down
3 changes: 3 additions & 0 deletions be/src/io/cache/fs_file_cache_storage.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,9 @@ class FSFileCacheStorage : public FileCacheStorage {

void load_cache_info_into_memory(BlockFileCache* _mgr) const;

[[nodiscard]] std::vector<std::string> get_path_in_local_cache_all_candidates(
const std::string& dir, size_t offset);

std::string _cache_base_path;
std::thread _cache_background_load_thread;
const std::shared_ptr<LocalFileSystem>& fs = global_local_filesystem();
Expand Down

0 comments on commit f0ee17b

Please sign in to comment.