diff --git a/be/src/util/mem_info.h b/be/src/util/mem_info.h index 9335933286ec24c..c17863cc56838f5 100644 --- a/be/src/util/mem_info.h +++ b/be/src/util/mem_info.h @@ -144,8 +144,11 @@ class MemInfo { if (config::enable_je_purge_dirty_pages) { try { // Purge all unused dirty pages for arena , or for all arenas if equals MALLCTL_ARENAS_ALL. - jemallctl(fmt::format("arena.{}.purge", MALLCTL_ARENAS_ALL).c_str(), nullptr, + int err = jemallctl(fmt::format("arena.{}.purge", MALLCTL_ARENAS_ALL).c_str(), nullptr, nullptr, nullptr, 0); + if (err) { + LOG(WARNING) << "Jemalloc purge all unused dirty pages failed"; + } } catch (...) { LOG(WARNING) << "Purge all unused dirty pages for all arenas failed"; } @@ -153,6 +156,22 @@ class MemInfo { #endif } + // the limit of `tcache` is the number of pages, not the total number of page bytes. + // `tcache` has two cleaning opportunities: 1. the number of memory alloc and releases reaches a certain number, + // recycle pages that has not been used for a long time; 2. recycle all `tcache` when the thread exits. + // here add a total size limit. + static inline void je_thread_tcache_flush() { +#ifdef USE_JEMALLOC + constexpr size_t TCACHE_LIMIT = (1ULL << 30); // 1G + if (allocator_cache_mem() - je_dirty_pages_mem() > TCACHE_LIMIT) { + int err = jemallctl("thread.tcache.flush", nullptr, nullptr, nullptr, 0); + if (err) { + LOG(WARNING) << "Jemalloc thread.tcache.flush failed"; + } + } +#endif + } + static std::mutex je_purge_dirty_pages_lock; static std::condition_variable je_purge_dirty_pages_cv; static std::atomic je_purge_dirty_pages_notify; diff --git a/be/src/vec/common/allocator.cpp b/be/src/vec/common/allocator.cpp index 2b1c05533cd5049..a20882e3d05ad79 100644 --- a/be/src/vec/common/allocator.cpp +++ b/be/src/vec/common/allocator.cpp @@ -111,6 +111,8 @@ void Allocator::sys_memory_check(size_t print_id(doris::thread_context()->task_id()), doris::thread_context()->get_thread_id(), doris::config::thread_wait_gc_max_milliseconds, err_msg); + // only query thread exceeded memory limit for the first time and wait_gc is true. + doris::MemInfo::je_thread_tcache_flush(); while (wait_milliseconds < doris::config::thread_wait_gc_max_milliseconds) { std::this_thread::sleep_for(std::chrono::milliseconds(100)); if (!doris::GlobalMemoryArbitrator::is_exceed_hard_mem_limit(size)) { diff --git a/bin/run-fs-benchmark.sh b/bin/run-fs-benchmark.sh index 9908cc6c4d1da58..f4edd4117d01e8a 100755 --- a/bin/run-fs-benchmark.sh +++ b/bin/run-fs-benchmark.sh @@ -280,7 +280,7 @@ export LIBHDFS_OPTS="${final_java_opt}" #echo "LIBHDFS_OPTS: ${LIBHDFS_OPTS}" # see https://github.com/apache/doris/blob/master/docs/zh-CN/community/developer-guide/debug-tool.md#jemalloc-heap-profile -export JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:15000,dirty_decay_ms:15000,oversize_threshold:0,prof:false,lg_prof_interval:32,lg_prof_sample:19,prof_gdump:false,prof_accum:false,prof_leak:false,prof_final:false" +export JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:5000,dirty_decay_ms:5000,oversize_threshold:0,prof:false,lg_prof_interval:-1" export AWS_EC2_METADATA_DISABLED=true export AWS_MAX_ATTEMPTS=2 diff --git a/bin/start_be.sh b/bin/start_be.sh index c2a4e5c8f9c828e..06a784bf2341a6a 100755 --- a/bin/start_be.sh +++ b/bin/start_be.sh @@ -403,7 +403,7 @@ export LIBHDFS_OPTS="${final_java_opt}" # log "LIBHDFS_OPTS: ${LIBHDFS_OPTS}" if [[ -z ${JEMALLOC_CONF} ]]; then - JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:15000,dirty_decay_ms:15000,oversize_threshold:0,prof:false,lg_prof_interval:32,lg_prof_sample:19,prof_gdump:false,prof_accum:false,prof_leak:false,prof_final:false" + JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:5000,dirty_decay_ms:5000,oversize_threshold:0,prof:false,lg_prof_interval:-1" fi if [[ -z ${JEMALLOC_PROF_PRFIX} ]]; then diff --git a/cloud/script/start.sh b/cloud/script/start.sh index e119e193d823f69..37d9abcb564e5a2 100644 --- a/cloud/script/start.sh +++ b/cloud/script/start.sh @@ -102,7 +102,7 @@ fi echo "LIBHDFS3_CONF=${LIBHDFS3_CONF}" -export JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:15000,dirty_decay_ms:15000,oversize_threshold:0,prof:true,prof_prefix:jeprof.out" +export JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:5000,dirty_decay_ms:5000,oversize_threshold:0,prof:false,lg_prof_interval:-1" mkdir -p "${DORIS_HOME}/log" echo "starts ${process} with args: $*" diff --git a/conf/be.conf b/conf/be.conf index 09b4d59daee3f1c..e3d952d9ffdfe7a 100644 --- a/conf/be.conf +++ b/conf/be.conf @@ -31,8 +31,8 @@ JAVA_OPTS_FOR_JDK_17="-Xmx1024m -DlogPath=$LOG_DIR/jni.log -Xlog:gc*:$LOG_DIR/be # https://github.com/apache/doris/blob/master/docs/zh-CN/community/developer-guide/debug-tool.md#jemalloc-heap-profile # https://jemalloc.net/jemalloc.3.html -JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:15000,dirty_decay_ms:15000,oversize_threshold:0,prof:false,lg_prof_interval:32,lg_prof_sample:19,prof_gdump:false,prof_accum:false,prof_leak:false,prof_final:false" -JEMALLOC_PROF_PRFIX="" +JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:5000,dirty_decay_ms:5000,oversize_threshold:0,prof:false,lg_prof_interval:-1" +JEMALLOC_PROF_PRFIX="jemalloc_heap_profile_" # ports for admin, web, heartbeat service be_port = 9060 diff --git a/regression-test/pipeline/external/conf/be.conf b/regression-test/pipeline/external/conf/be.conf index 94a038cfa885fb2..306459336222c61 100644 --- a/regression-test/pipeline/external/conf/be.conf +++ b/regression-test/pipeline/external/conf/be.conf @@ -30,8 +30,8 @@ JAVA_OPTS_FOR_JDK_17="-Xmx1024m -DlogPath=$DORIS_HOME/log/jni.log -Xlog:gc*:$DOR # https://github.com/apache/doris/blob/master/docs/zh-CN/community/developer-guide/debug-tool.md#jemalloc-heap-profile # https://jemalloc.net/jemalloc.3.html -JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:15000,dirty_decay_ms:15000,oversize_threshold:0,prof:false,lg_prof_interval:32,lg_prof_sample:19,prof_gdump:false,prof_accum:false,prof_leak:false,prof_final:false" -JEMALLOC_PROF_PRFIX="" +JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:5000,dirty_decay_ms:5000,oversize_threshold:0,prof:false,lg_prof_interval:-1" +JEMALLOC_PROF_PRFIX="jemalloc_heap_profile_" # INFO, WARNING, ERROR, FATAL sys_log_level = INFO diff --git a/regression-test/pipeline/p0/conf/be.conf b/regression-test/pipeline/p0/conf/be.conf index a072ac7ad50aca4..e72709603d77cd4 100644 --- a/regression-test/pipeline/p0/conf/be.conf +++ b/regression-test/pipeline/p0/conf/be.conf @@ -30,8 +30,8 @@ JAVA_OPTS_FOR_JDK_17="-Xmx1024m -DlogPath=$DORIS_HOME/log/jni.log -Xlog:gc*:$DOR # https://github.com/apache/doris/blob/master/docs/zh-CN/community/developer-guide/debug-tool.md#jemalloc-heap-profile # https://jemalloc.net/jemalloc.3.html -JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:15000,dirty_decay_ms:15000,oversize_threshold:0,prof:false,lg_prof_interval:32,lg_prof_sample:19,prof_gdump:false,prof_accum:false,prof_leak:false,prof_final:false" -JEMALLOC_PROF_PRFIX="" +JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:5000,dirty_decay_ms:5000,oversize_threshold:0,prof:false,lg_prof_interval:-1" +JEMALLOC_PROF_PRFIX="jemalloc_heap_profile_" # INFO, WARNING, ERROR, FATAL sys_log_level = INFO diff --git a/regression-test/pipeline/p1/conf/be.conf b/regression-test/pipeline/p1/conf/be.conf index 675518ac0ce464a..466f0d589614270 100644 --- a/regression-test/pipeline/p1/conf/be.conf +++ b/regression-test/pipeline/p1/conf/be.conf @@ -30,8 +30,8 @@ JAVA_OPTS_FOR_JDK_17="-Xmx1024m -DlogPath=$DORIS_HOME/log/jni.log -Xlog:gc*:$DOR # https://github.com/apache/doris/blob/master/docs/zh-CN/community/developer-guide/debug-tool.md#jemalloc-heap-profile # https://jemalloc.net/jemalloc.3.html -JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:15000,dirty_decay_ms:15000,oversize_threshold:0,prof:false,lg_prof_interval:32,lg_prof_sample:19,prof_gdump:false,prof_accum:false,prof_leak:false,prof_final:false" -JEMALLOC_PROF_PRFIX="" +JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:5000,dirty_decay_ms:5000,oversize_threshold:0,prof:false,lg_prof_interval:-1" +JEMALLOC_PROF_PRFIX="jemalloc_heap_profile_" # INFO, WARNING, ERROR, FATAL sys_log_level = INFO