From 3a96f4c9c66d8305e83158181870c08c0452b1e2 Mon Sep 17 00:00:00 2001 From: Tor Brede Vekterli Date: Mon, 28 Oct 2024 14:19:14 +0000 Subject: [PATCH] Also test index reads with buffered (syscall) reads * Parameterize test on `search.io` modes {`MMAP`, `DIRECTIO`, `NORMAL`} * Reduce client cardinalities down to {16, 32, 64}. * Don't print network usage stats (only really care about system+disks) --- lib/performance/stat.rb | 77 ++++++++++--------- .../mmap_vs_directio/mmap_vs_directio.rb | 57 +++++++------- 2 files changed, 69 insertions(+), 65 deletions(-) diff --git a/lib/performance/stat.rb b/lib/performance/stat.rb index 9b2713fac..20221076d 100644 --- a/lib/performance/stat.rb +++ b/lib/performance/stat.rb @@ -327,45 +327,50 @@ def printable_result(params={}) rb = TextReport::Builder.new(title, :duration => @duration, :n_ops => params[:n_ops]) + filter = params[:filter] ? params[:filter] : [:sys, :net, :disk] + + if filter.include? :sys + rb.open_group('System') + rb.single_metric('CPU utilization', m[:cpu_util] * 100.0, :suffix => '%') + rb.avg_metric('Number of forks done', m[:fork]) + rb.avg_metric('Pages swapped out', m[:swap][:swapped_out], :warn_if_exceeding => 0) + rb.avg_metric('Pages swapped in', m[:swap][:swapped_in], :warn_if_exceeding => 0) + rb.close_group + end - rb.open_group('System') - rb.single_metric('CPU utilization', m[:cpu_util] * 100.0, :suffix => '%') - rb.avg_metric('Number of forks done', m[:fork]) - rb.avg_metric('Pages swapped out', m[:swap][:swapped_out], :warn_if_exceeding => 0) - rb.avg_metric('Pages swapped in', m[:swap][:swapped_in], :warn_if_exceeding => 0) - rb.close_group - - rb.open_group('Network') - rb.open_group('IP') - rb.avg_metric('Packets sent', m[:network][:ip][:out_requests]) - rb.avg_metric('Packets received', m[:network][:ip][:in_receives]) - rb.close_group - rb.open_group('UDP') - rb.avg_metric('Datagrams sent', m[:network][:udp][:out_datagrams]) - rb.avg_metric('Datagrams received', m[:network][:udp][:in_datagrams]) - rb.avg_metric('Datagram receive errors', m[:network][:udp][:in_errors], :warn_if_exceeding => 0) - rb.close_group - rb.open_group('TCP') - rb.avg_metric('Connections established', m[:network][:tcp][:conn_est]) - rb.avg_metric('Connections dropped', m[:network][:tcp][:conn_drop]) - rb.avg_metric('Connections timed out', m[:network][:tcp][:conn_timeout], :warn_if_exceeding => 0) - rb.avg_metric('Segments sent', m[:network][:tcp][:out_segs]) - rb.avg_metric('Segments received', m[:network][:tcp][:in_segs]) - rb.avg_metric('Segments retransmitted', m[:network][:tcp][:retrans_segs]) - rb.avg_metric('Listen overflows', m[:network][:tcp][:listen_overflow], :warn_if_exceeding => 0) - rb.close_group - m[:network][:if].each do |ni, ni_m| - rb.open_group("Interface '#{ni}'") - rb.avg_metric('Packets sent', ni_m[:out_packets]) - rb.avg_metric('KiB sent', ni_m[:out_bytes] / 1024.0, :unit => 'KiB') - rb.single_metric('Avg sent packet size', ni_m[:out_bytes] / ni_m[:out_packets].to_f / 1024, :suffix => ' KiB') - rb.avg_metric('Packets received', ni_m[:in_packets]) - rb.avg_metric('KiB received', ni_m[:in_bytes] / 1024, :unit => 'KiB') - rb.single_metric('Avg received packet size', ni_m[:in_bytes] / ni_m[:in_packets].to_f / 1024, :suffix => ' KiB') + if filter.include? :net + rb.open_group('Network') + rb.open_group('IP') + rb.avg_metric('Packets sent', m[:network][:ip][:out_requests]) + rb.avg_metric('Packets received', m[:network][:ip][:in_receives]) + rb.close_group + rb.open_group('UDP') + rb.avg_metric('Datagrams sent', m[:network][:udp][:out_datagrams]) + rb.avg_metric('Datagrams received', m[:network][:udp][:in_datagrams]) + rb.avg_metric('Datagram receive errors', m[:network][:udp][:in_errors], :warn_if_exceeding => 0) + rb.close_group + rb.open_group('TCP') + rb.avg_metric('Connections established', m[:network][:tcp][:conn_est]) + rb.avg_metric('Connections dropped', m[:network][:tcp][:conn_drop]) + rb.avg_metric('Connections timed out', m[:network][:tcp][:conn_timeout], :warn_if_exceeding => 0) + rb.avg_metric('Segments sent', m[:network][:tcp][:out_segs]) + rb.avg_metric('Segments received', m[:network][:tcp][:in_segs]) + rb.avg_metric('Segments retransmitted', m[:network][:tcp][:retrans_segs]) + rb.avg_metric('Listen overflows', m[:network][:tcp][:listen_overflow], :warn_if_exceeding => 0) + rb.close_group + m[:network][:if].each do |ni, ni_m| + rb.open_group("Interface '#{ni}'") + rb.avg_metric('Packets sent', ni_m[:out_packets]) + rb.avg_metric('KiB sent', ni_m[:out_bytes] / 1024.0, :unit => 'KiB') + rb.single_metric('Avg sent packet size', ni_m[:out_bytes] / ni_m[:out_packets].to_f / 1024, :suffix => ' KiB') + rb.avg_metric('Packets received', ni_m[:in_packets]) + rb.avg_metric('KiB received', ni_m[:in_bytes] / 1024, :unit => 'KiB') + rb.single_metric('Avg received packet size', ni_m[:in_bytes] / ni_m[:in_packets].to_f / 1024, :suffix => ' KiB') + rb.close_group + end rb.close_group end - rb.close_group - if m[:disk] # not present on VMs + if filter.include? :disk and m[:disk] # not present on VMs rb.open_group('Disks') bytes_per_sector = 512 m[:disk].each do |dev, s| diff --git a/tests/performance/mmap_vs_directio/mmap_vs_directio.rb b/tests/performance/mmap_vs_directio/mmap_vs_directio.rb index 270acec5b..93d685bf3 100644 --- a/tests/performance/mmap_vs_directio/mmap_vs_directio.rb +++ b/tests/performance/mmap_vs_directio/mmap_vs_directio.rb @@ -22,13 +22,14 @@ def teardown def test_wikipedia_corpus_search_performance set_description('Test search performance on English Wikipedia corpus and query set '+ 'when file reading is done via either mmap or Direct IO') - deploy_app(make_app(search_direct_io: false)) + deploy_app(make_app(search_io_mode: 'MMAP')) @search_node = vespa.search['search'].first @container = vespa.container.values.first start - query_file_name = 'squad2-questions.fbench.141k.txt' - no_stop_words_query_file_name = 'squad2-questions.max-df-20.fbench.141k.txt' + @query_file_name = 'squad2-questions.fbench.141k.txt' + @no_stop_words_query_file_name = 'squad2-questions.max-df-20.fbench.141k.txt' + report_io_stat_deltas do feed_file('enwiki-20240801-pages.1M.jsonl.zst') end @@ -40,41 +41,39 @@ def test_wikipedia_corpus_search_performance # Note that we don't tag as "warmup=true", as we want profiling enabled here as well. puts "Warming up mmap'ed region with 64 clients" report_io_stat_deltas do - benchmark_queries(query_file_name, 'mmap_warmup', 64, false) + benchmark_queries(@query_file_name, 'mmap_warmup', 64, false) end - puts "Searching with mmap-backed search stores" - [8, 16, 32, 64].each do |clients| - report_io_stat_deltas do - benchmark_queries(query_file_name, 'mmap', clients, false) - end - report_io_stat_deltas do - benchmark_queries(no_stop_words_query_file_name, 'mmap_no_stop_words', clients, false) - end + ['MMAP', 'DIRECTIO', 'NORMAL'].each do |io_mode| + deploy_and_run_queries(search_io_mode: io_mode) end - vespa.stop_content_node('search', 0) - - puts "Redeploying with Direct IO for searches" - deploy_app(make_app(search_direct_io: true)) - # Model has changed under our feet, must refresh remote objects. - @search_node = vespa.search['search'].first - @container = vespa.container.values.first + stop + end - vespa.start_content_node('search', 0) - sleep 2 # Allow for container health pings to catch up + # Feeding must already have been done (using MMAP search_io_mode) + def deploy_and_run_queries(search_io_mode:) + if search_io_mode != 'MMAP' + vespa.stop_content_node('search', 0) + puts "Redeploying app with `search.io` mode '#{search_io_mode}'" + deploy_app(make_app(search_io_mode: search_io_mode)) + @search_node = vespa.search['search'].first + @container = vespa.container.values.first + vespa.start_content_node('search', 0) + sleep 2 # Allow for container health pings to catch up + end - puts "Searching with Direct IO-backed search stores" - [8, 16, 32, 64].each do |clients| + pretty_mode = search_io_mode.downcase + puts "Searching with '#{pretty_mode}' search store backing" + [16, 32, 64].each do |clients| report_io_stat_deltas do - benchmark_queries(query_file_name, 'directio', clients, false) + benchmark_queries(@query_file_name, pretty_mode, clients, false) end report_io_stat_deltas do - benchmark_queries(no_stop_words_query_file_name, 'directio_no_stop_words', clients, false) + benchmark_queries(@no_stop_words_query_file_name, "#{pretty_mode}_no_stop_words", clients, false) end end - stop end def feed_file(feed_file, n_docs = -1) @@ -94,7 +93,7 @@ def download_file(file_name, vespa_node) download_file_from_s3(file_name, vespa_node, 'wikipedia') end - def make_app(search_direct_io:) + def make_app(search_io_mode:) SearchApp.new.sd(selfdir + 'wikimedia.sd'). container(Container.new('default'). jvmoptions("-Xms16g -Xmx16g"). @@ -103,14 +102,14 @@ def make_app(search_direct_io:) documentapi(ContainerDocumentApi.new)). indexing_cluster('default'). indexing_chain('indexing'). - search_io(search_direct_io ? 'DIRECTIO' : 'MMAP') + search_io(search_io_mode) end def report_io_stat_deltas stat_before = @search_node.performance_snapshot yield stat_after = @search_node.performance_snapshot - puts Perf::Stat::snapshot_period(stat_before, stat_after).printable_result + puts Perf::Stat::snapshot_period(stat_before, stat_after).printable_result({:filter => [:sys, :disk]}) end # TODO dedupe