Skip to content

Commit

Permalink
issue-1146: add monitoring for in-memory state cache sizes and exhaus…
Browse files Browse the repository at this point in the history
…tiveness (#2418)
  • Loading branch information
debnatkh authored Nov 4, 2024
1 parent 0b1de0c commit f0e639f
Show file tree
Hide file tree
Showing 8 changed files with 95 additions and 8 deletions.
8 changes: 7 additions & 1 deletion cloud/filestore/libs/storage/tablet/tablet_actor.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,11 @@ class TIndexTabletActor final
// Read-write transactions
std::atomic<i64> InMemoryIndexStateRWCount{0};

std::atomic<i64> InMemoryIndexStateNodesCount;
std::atomic<i64> InMemoryIndexStateNodeRefsCount;
std::atomic<i64> InMemoryIndexStateNodeAttrsCount;
std::atomic<i64> InMemoryIndexStateIsExhaustive;

// Data stats
std::atomic<i64> FreshBytesCount{0};
std::atomic<i64> DeletedFreshBytesCount{0};
Expand Down Expand Up @@ -221,7 +226,8 @@ class TIndexTabletActor final
const TReadAheadCacheStats& readAheadStats,
const TNodeIndexCacheStats& nodeIndexCacheStats,
const TNodeToSessionCounters& nodeToSessionCounters,
const TMiscNodeStats& miscNodeStats);
const TMiscNodeStats& miscNodeStats,
const TInMemoryIndexStateStats& inMemoryIndexStateStats);
} Metrics;

const IProfileLogPtr ProfileLog;
Expand Down
26 changes: 23 additions & 3 deletions cloud/filestore/libs/storage/tablet/tablet_actor_counters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,18 @@ void TIndexTabletActor::TMetrics::Register(
REGISTER_AGGREGATABLE_SUM(
InMemoryIndexStateRWCount,
EMetricType::MT_DERIVATIVE);
REGISTER_AGGREGATABLE_SUM(
InMemoryIndexStateNodesCount,
EMetricType::MT_ABSOLUTE);
REGISTER_AGGREGATABLE_SUM(
InMemoryIndexStateNodeRefsCount,
EMetricType::MT_ABSOLUTE);
REGISTER_AGGREGATABLE_SUM(
InMemoryIndexStateNodeAttrsCount,
EMetricType::MT_ABSOLUTE);
REGISTER_AGGREGATABLE_SUM(
InMemoryIndexStateIsExhaustive,
EMetricType::MT_ABSOLUTE);

REGISTER_AGGREGATABLE_SUM(FreshBytesCount, EMetricType::MT_ABSOLUTE);
REGISTER_AGGREGATABLE_SUM(DeletedFreshBytesCount, EMetricType::MT_ABSOLUTE);
Expand Down Expand Up @@ -405,7 +417,8 @@ void TIndexTabletActor::TMetrics::Update(
const TReadAheadCacheStats& readAheadStats,
const TNodeIndexCacheStats& nodeIndexCacheStats,
const TNodeToSessionCounters& nodeToSessionCounters,
const TMiscNodeStats& miscNodeStats)
const TMiscNodeStats& miscNodeStats,
const TInMemoryIndexStateStats& inMemoryIndexStateStats)
{
const ui32 blockSize = fileSystem.GetBlockSize();

Expand Down Expand Up @@ -473,6 +486,11 @@ void TIndexTabletActor::TMetrics::Update(
Store(ReadAheadCacheNodeCount, readAheadStats.NodeCount);
Store(NodeIndexCacheNodeCount, nodeIndexCacheStats.NodeCount);

Store(InMemoryIndexStateNodesCount, inMemoryIndexStateStats.NodesCount);
Store(InMemoryIndexStateNodeRefsCount, inMemoryIndexStateStats.NodeRefsCount);
Store(InMemoryIndexStateNodeAttrsCount, inMemoryIndexStateStats.NodeAttrsCount);
Store(InMemoryIndexStateIsExhaustive, inMemoryIndexStateStats.IsNodeRefsExhaustive);

Store(
NodesOpenForWritingBySingleSession,
nodeToSessionCounters.NodesOpenForWritingBySingleSession);
Expand Down Expand Up @@ -537,7 +555,8 @@ void TIndexTabletActor::RegisterStatCounters()
CalculateReadAheadCacheStats(),
CalculateNodeIndexCacheStats(),
GetNodeToSessionCounters(),
GetMiscNodeStats());
GetMiscNodeStats(),
GetInMemoryIndexStateStats());

Metrics.Register(fsId, storageMediaKind);
}
Expand Down Expand Up @@ -584,7 +603,8 @@ void TIndexTabletActor::HandleUpdateCounters(
CalculateReadAheadCacheStats(),
CalculateNodeIndexCacheStats(),
GetNodeToSessionCounters(),
GetMiscNodeStats());
GetMiscNodeStats(),
GetInMemoryIndexStateStats());
SendMetricsToExecutor(ctx);

UpdateCountersScheduled = false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1078,7 +1078,7 @@ void TIndexTabletActor::HandleHttpInfo_Default(
}
} else {
DIV_CLASS("alert") {
out << "Write allowed: " << message;
out << "Write allowed";
}
}
if (BackpressurePeriodStart || BackpressureErrorCount) {
Expand Down
1 change: 1 addition & 0 deletions cloud/filestore/libs/storage/tablet/tablet_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -1319,6 +1319,7 @@ FILESTORE_DUPCACHE_REQUESTS(FILESTORE_DECLARE_DUPCACHE)
void UpdateInMemoryIndexState(
TVector<TInMemoryIndexState::TIndexStateRequest> nodeUpdates);
void MarkNodeRefsLoadComplete();
TInMemoryIndexStateStats GetInMemoryIndexStateStats() const;
};

} // namespace NCloud::NFileStore::NStorage
10 changes: 10 additions & 0 deletions cloud/filestore/libs/storage/tablet/tablet_state_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,16 @@ void TInMemoryIndexState::MarkNodeRefsLoadComplete()
IsNodeRefsExhaustive = !IsNodeRefsEvictionObserved;
}

TInMemoryIndexStateStats TInMemoryIndexState::GetStats() const
{
return TInMemoryIndexStateStats{
.NodesCount = Nodes.size(),
.NodeRefsCount = NodeRefs.size(),
.NodeAttrsCount = NodeAttrs.size(),
.IsNodeRefsExhaustive = IsNodeRefsExhaustive,
};
}

//
// Nodes
//
Expand Down
12 changes: 12 additions & 0 deletions cloud/filestore/libs/storage/tablet/tablet_state_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,16 @@ namespace NCloud::NFileStore::NStorage {

////////////////////////////////////////////////////////////////////////////////

struct TInMemoryIndexStateStats
{
ui64 NodesCount;
ui64 NodeRefsCount;
ui64 NodeAttrsCount;
bool IsNodeRefsExhaustive;
};

////////////////////////////////////////////////////////////////////////////////

/**
* @brief Stores the state of the index tables in memory. Can be used to perform
* read-only operations.
Expand All @@ -28,6 +38,8 @@ class TInMemoryIndexState : public IIndexTabletDatabase

void MarkNodeRefsLoadComplete();

[[nodiscard]] TInMemoryIndexStateStats GetStats() const;

//
// Nodes
//
Expand Down
5 changes: 5 additions & 0 deletions cloud/filestore/libs/storage/tablet/tablet_state_nodes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -619,4 +619,9 @@ void TIndexTabletState::MarkNodeRefsLoadComplete()
Impl->InMemoryIndexState.MarkNodeRefsLoadComplete();
}

TInMemoryIndexStateStats TIndexTabletState::GetInMemoryIndexStateStats() const
{
return Impl->InMemoryIndexState.GetStats();
}

} // namespace NCloud::NFileStore::NStorage
39 changes: 36 additions & 3 deletions cloud/filestore/libs/storage/tablet/tablet_ut_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,13 @@ namespace {

struct TTxStats
{
i64 ROCacheHitCount = 0;
i64 ROCacheMissCount = 0;
i64 RWCount = 0;
i64 ROCacheHitCount;
i64 ROCacheMissCount;
i64 RWCount;
i64 NodesCount;
i64 NodeRefsCount;
i64 NodeAttrsCount;
bool IsExhaustive;
};

TTxStats GetTxStats(TTestEnv& env, TIndexTabletClient& tablet)
Expand Down Expand Up @@ -53,6 +57,30 @@ TTxStats GetTxStats(TTestEnv& env, TIndexTabletClient& tablet)
stats.RWCount = value;
return true;
}},
{{{"filesystem", "test"}, {"sensor", "InMemoryIndexStateNodesCount"}},
[&stats](i64 value)
{
stats.NodesCount = value;
return true;
}},
{{{"filesystem", "test"}, {"sensor", "InMemoryIndexStateNodeRefsCount"}},
[&stats](i64 value)
{
stats.NodeRefsCount = value;
return true;
}},
{{{"filesystem", "test"}, {"sensor", "InMemoryIndexStateNodeAttrsCount"}},
[&stats](i64 value)
{
stats.NodeAttrsCount = value;
return true;
}},
{{{"filesystem", "test"}, {"sensor", "InMemoryIndexStateIsExhaustive"}},
[&stats](i64 value)
{
stats.IsExhaustive = value;
return true;
}},
});
return stats;
}
Expand Down Expand Up @@ -122,6 +150,8 @@ Y_UNIT_TEST_SUITE(TIndexTabletTest_NodesCache)
2,
statsAfter.ROCacheMissCount - statsBefore.ROCacheMissCount);
UNIT_ASSERT_VALUES_EQUAL(2, statsAfter.RWCount - statsBefore.RWCount);
UNIT_ASSERT_VALUES_EQUAL(1, statsAfter.NodeRefsCount - statsBefore.NodeRefsCount);
UNIT_ASSERT_VALUES_EQUAL(1, statsAfter.NodesCount - statsBefore.NodesCount);
}

// Note: this test does not check the cache eviction policy, as cache size
Expand Down Expand Up @@ -388,6 +418,7 @@ Y_UNIT_TEST_SUITE(TIndexTabletTest_NodesCache)
1,
statsAfter.ROCacheMissCount - statsBefore.ROCacheMissCount);
UNIT_ASSERT_VALUES_EQUAL(5, statsAfter.RWCount - statsBefore.RWCount);
UNIT_ASSERT_VALUES_EQUAL(2, statsAfter.NodeAttrsCount - statsBefore.NodeAttrsCount);
}

Y_UNIT_TEST(ShouldUpdateCacheUponRemoveNodeXAttr)
Expand Down Expand Up @@ -910,6 +941,7 @@ Y_UNIT_TEST_SUITE(TIndexTabletTest_NodesCache)
UNIT_ASSERT_VALUES_EQUAL(
0,
statsAfter.ROCacheMissCount - statsBefore.ROCacheMissCount);
UNIT_ASSERT(statsAfter.IsExhaustive);

// Now let us ensure that the cache is evicted
for (int i = 0; i < 100; ++i) {
Expand All @@ -928,6 +960,7 @@ Y_UNIT_TEST_SUITE(TIndexTabletTest_NodesCache)
UNIT_ASSERT_VALUES_EQUAL(
1,
statsAfter.ROCacheMissCount - statsBefore.ROCacheMissCount);
UNIT_ASSERT(!statsAfter.IsExhaustive);
}
}

Expand Down

0 comments on commit f0e639f

Please sign in to comment.