diff --git a/src/leveled_imanifest.erl b/src/leveled_imanifest.erl index 8532af2a..4248beb8 100644 --- a/src/leveled_imanifest.erl +++ b/src/leveled_imanifest.erl @@ -19,7 +19,8 @@ reader/2, writer/3, printer/1, - complete_filex/0 + complete_filex/0, + get_cdbpids/1 ]). -define(MANIFEST_FILEX, "man"). @@ -218,6 +219,10 @@ from_list(Manifest) -> % reads are more common than stale reads lists:foldr(fun prepend_entry/2, [], Manifest). +-spec get_cdbpids(manifest()) -> list(pid()). +%% @doc return a list of PIDs within the manifest +get_cdbpids(Manifest) -> + lists:map(fun(ME) -> element(3, ME) end, to_list(Manifest)). %%%============================================================================ %%% Internal Functions diff --git a/src/leveled_inker.erl b/src/leveled_inker.erl index 3c52ab07..6e381518 100644 --- a/src/leveled_inker.erl +++ b/src/leveled_inker.erl @@ -118,14 +118,20 @@ ink_loglevel/2, ink_addlogs/2, ink_removelogs/2, - ink_getjournalsqn/1]). + ink_getjournalsqn/1, + ink_getcdbpids/1, + ink_getclerkpid/1 + ]). -export([filepath/2, filepath/3]). -ifdef(TEST). --export([build_dummy_journal/0, clean_testdir/1]). +-export( + [build_dummy_journal/0, clean_testdir/1] +). -endif. + -define(MANIFEST_FP, "journal_manifest"). -define(FILES_FP, "journal_files"). -define(COMPACT_FP, "post_compact"). @@ -480,6 +486,19 @@ ink_removelogs(Pid, ForcedLogs) -> ink_getjournalsqn(Pid) -> gen_server:call(Pid, get_journalsqn, infinity). +-spec ink_getcdbpids(pid()) -> list(pid()). +%% @doc +%% Used for profiling in tests - get a list of SST PIDs to profile +ink_getcdbpids(Pid) -> + gen_server:call(Pid, get_cdbpids). + +-spec ink_getclerkpid(pid()) -> pid(). +%% @doc +%% Used for profiling in tests - get the clerk PID to profile +ink_getclerkpid(Pid) -> + gen_server:call(Pid, get_clerkpid). + + %%%============================================================================ %%% gen_server callbacks %%%============================================================================ @@ -673,6 +692,11 @@ handle_call({check_sqn, LedgerSQN}, _From, State) -> end; handle_call(get_journalsqn, _From, State) -> {reply, {ok, State#state.journal_sqn}, State}; +handle_call(get_cdbpids, _From, State) -> + CDBPids = leveled_imanifest:get_cdbpids(State#state.manifest), + {reply, [State#state.active_journaldb|CDBPids], State}; +handle_call(get_clerkpid, _From, State) -> + {reply, State#state.clerk, State}; handle_call(close, _From, State=#state{is_snapshot=Snap}) when Snap == true -> ok = ink_releasesnapshot(State#state.source_inker, self()), {stop, normal, ok, State}; diff --git a/src/leveled_penciller.erl b/src/leveled_penciller.erl index db2ef3bc..97d76188 100644 --- a/src/leveled_penciller.erl +++ b/src/leveled_penciller.erl @@ -201,6 +201,8 @@ sst_rootpath/1, sst_filename/3]). +-export([pcl_getsstpids/1, pcl_getclerkpid/1]). + -ifdef(TEST). -export([ clean_testdir/1]). @@ -614,6 +616,18 @@ pcl_addlogs(Pid, ForcedLogs) -> pcl_removelogs(Pid, ForcedLogs) -> gen_server:cast(Pid, {remove_logs, ForcedLogs}). +-spec pcl_getsstpids(pid()) -> list(pid()). +%% @doc +%% Used for profiling in tests - get a list of SST PIDs to profile +pcl_getsstpids(Pid) -> + gen_server:call(Pid, get_sstpids). + +-spec pcl_getclerkpid(pid()) -> pid(). +%% @doc +%% Used for profiling in tests - get the clerk PID to profile +pcl_getclerkpid(Pid) -> + gen_server:call(Pid, get_clerkpid). + %%%============================================================================ %%% gen_server callbacks %%%============================================================================ @@ -965,7 +979,11 @@ handle_call(check_for_work, _From, State) -> {_WL, WC} = leveled_pmanifest:check_for_work(State#state.manifest), {reply, WC > 0, State}; handle_call(persisted_sqn, _From, State) -> - {reply, State#state.persisted_sqn, State}. + {reply, State#state.persisted_sqn, State}; +handle_call(get_sstpids, _From, State) -> + {reply, leveled_pmanifest:get_sstpids(State#state.manifest), State}; +handle_call(get_clerkpid, _From, State) -> + {reply, State#state.clerk, State}. handle_cast({manifest_change, Manifest}, State) -> NewManSQN = leveled_pmanifest:get_manifest_sqn(Manifest), diff --git a/src/leveled_pmanifest.erl b/src/leveled_pmanifest.erl index b951930c..eb59ad0f 100644 --- a/src/leveled_pmanifest.erl +++ b/src/leveled_pmanifest.erl @@ -46,7 +46,8 @@ levelzero_present/1, check_bloom/3, report_manifest_level/2, - snapshot_pids/1 + snapshot_pids/1, + get_sstpids/1 ]). -export([ @@ -699,6 +700,31 @@ check_bloom(Manifest, FP, Hash) -> snapshot_pids(Manifest) -> lists:map(fun(S) -> element(1, S) end, Manifest#manifest.snapshots). +-spec get_sstpids(manifest()) -> list(pid()). +%% @doc +%% Return a list of all SST PIDs in the current manifest +get_sstpids(Manifest) -> + FoldFun = + fun(I, Acc) -> + Level = array:get(I, Manifest#manifest.levels), + LevelAsList = + case I of + I when I > 1 -> + leveled_tree:to_list(Level); + _ -> + Level + end, + Pids = + lists:map( + fun(MaybeME) -> + ME = get_manifest_entry(MaybeME), + ME#manifest_entry.owner + end, + LevelAsList), + Acc ++ Pids + end, + lists:foldl(FoldFun, [], lists:seq(0, Manifest#manifest.basement)). + %%%============================================================================ %%% Internal Functions %%%============================================================================ diff --git a/test/end_to_end/basic_SUITE.erl b/test/end_to_end/basic_SUITE.erl index e5ed2e23..56d5657e 100644 --- a/test/end_to_end/basic_SUITE.erl +++ b/test/end_to_end/basic_SUITE.erl @@ -14,7 +14,8 @@ bigjournal_littlejournal/1, bigsst_littlesst/1, safereaderror_startup/1, - remove_journal_test/1 + remove_journal_test/1, + bigpcl_bucketlist/1 ]). all() -> [ @@ -30,7 +31,8 @@ all() -> [ bigjournal_littlejournal, bigsst_littlesst, safereaderror_startup, - remove_journal_test + remove_journal_test, + bigpcl_bucketlist ]. @@ -1197,4 +1199,78 @@ safereaderror_startup(_Config) -> {ok, ReadBack} = leveled_bookie:book_get(Bookie2, B1, K1), io:format("Read back ~w", [ReadBack]), true = ReadBack == Obj2, - ok = leveled_bookie:book_close(Bookie2). \ No newline at end of file + ok = leveled_bookie:book_close(Bookie2). + +bigpcl_bucketlist(_Config) -> + %% https://github.com/martinsumner/leveled/issues/326 + %% In OTP 22+ there appear to be issues with anonymous functions which + %% have a reference to loop state, requiring a copy of all the loop state + %% to be made when returning the function. + %% This test creates alarge loop state on the leveled_penciller to prove + %% this. + %% The problem can be resolved simply by renaming the element of the loop + %% state using within the anonymous function. + RootPath = testutil:reset_filestructure(), + BucketCount = 500, + ObjectCount = 100, + StartOpts1 = [{root_path, RootPath}, + {max_journalsize, 50000000}, + {cache_size, 4000}, + {max_pencillercachesize, 128000}, + {max_sstslots, 256}, + {sync_strategy, testutil:sync_strategy()}], + {ok, Bookie1} = leveled_bookie:book_start(StartOpts1), + BucketList = + lists:map(fun(I) -> list_to_binary(integer_to_list(I)) end, + lists:seq(1, BucketCount)), + + MapFun = + fun(B) -> + testutil:generate_objects(ObjectCount, 1, [], + leveled_rand:rand_bytes(100), + fun() -> [] end, + B) + end, + ObjLofL = lists:map(MapFun, BucketList), + lists:foreach(fun(ObjL) -> testutil:riakload(Bookie1, ObjL) end, ObjLofL), + BucketFold = + fun(B, _K, _V, Acc) -> + case sets:is_element(B, Acc) of + true -> + Acc; + false -> + sets:add_element(B, Acc) + end + end, + FBAccT = {BucketFold, sets:new()}, + + {async, BucketFolder1} = + leveled_bookie:book_headfold(Bookie1, + ?RIAK_TAG, + {bucket_list, BucketList}, + FBAccT, + false, false, false), + + {FoldTime1, BucketList1} = timer:tc(BucketFolder1, []), + true = BucketCount == sets:size(BucketList1), + ok = leveled_bookie:book_close(Bookie1), + + {ok, Bookie2} = leveled_bookie:book_start(StartOpts1), + + {async, BucketFolder2} = + leveled_bookie:book_headfold(Bookie2, + ?RIAK_TAG, + {bucket_list, BucketList}, + FBAccT, + false, false, false), + {FoldTime2, BucketList2} = timer:tc(BucketFolder2, []), + true = BucketCount == sets:size(BucketList2), + + io:format("Fold pre-close ~w ms post-close ~w ms~n", + [FoldTime1 div 1000, FoldTime2 div 1000]), + + true = FoldTime1 < 10 * FoldTime2, + %% The fold in-memory should be the same order of magnitude of response + %% time as the fold post-persistence + + ok = leveled_bookie:book_destroy(Bookie2). \ No newline at end of file diff --git a/test/end_to_end/perf_SUITE.erl b/test/end_to_end/perf_SUITE.erl index 9ac6f393..1394f0a1 100644 --- a/test/end_to_end/perf_SUITE.erl +++ b/test/end_to_end/perf_SUITE.erl @@ -1,85 +1,529 @@ -module(perf_SUITE). --include_lib("common_test/include/ct.hrl"). --include("include/leveled.hrl"). --export([all/0]). --export([bigpcl_bucketlist/1 - ]). - -all() -> [bigpcl_bucketlist]. +-include("../include/leveled.hrl"). +-define(INFO, info). +-export([all/0, suite/0]). +-export([ + riak_ctperf/1, riak_fullperf/1, riak_profileperf/1 +]). +all() -> [riak_ctperf]. +suite() -> [{timetrap, {hours, 16}}]. -bigpcl_bucketlist(_Config) -> - %% https://github.com/martinsumner/leveled/issues/326 - %% In OTP 22+ there appear to be issues with anonymous functions which - %% have a reference to loop state, requiring a copy of all the loop state - %% to be made when returning the function. - %% This test creates alarge loop state on the leveled_penciller to prove - %% this. - %% The problem can be resolved simply by renaming the element of the loop - %% state using within the anonymous function. - RootPath = testutil:reset_filestructure(), - BucketCount = 500, - ObjectCount = 100, - StartOpts1 = [{root_path, RootPath}, - {max_journalsize, 50000000}, - {cache_size, 4000}, - {max_pencillercachesize, 128000}, - {max_sstslots, 256}, - {sync_strategy, testutil:sync_strategy()}, - {compression_point, on_compact}], + +% For full performance test +riak_fullperf(_Config) -> + R2A = riak_load_tester(<<"B0">>, 2000000, 2048, [], native), + output_result(R2A), + R2B = riak_load_tester(<<"B0">>, 2000000, 2048, [], native), + output_result(R2B), + R2C = riak_load_tester(<<"B0">>, 2000000, 2048, [], native), + output_result(R2C), + R5A = riak_load_tester(<<"B0">>, 5000000, 2048, [], native), + output_result(R5A), + R5B = riak_load_tester(<<"B0">>, 5000000, 2048, [], native), + output_result(R5B), + R10 = riak_load_tester(<<"B0">>, 10000000, 2048, [], native), + output_result(R10) + . + +riak_profileperf(_Config) -> + riak_load_tester( + <<"B0">>, + 2000000, + 2048, + [load, head, get, query, mini_query, full, guess, estimate, update], + native). + +% For standard ct test runs +riak_ctperf(_Config) -> + riak_load_tester(<<"B0">>, 400000, 1024, [], native). + +riak_load_tester(Bucket, KeyCount, ObjSize, ProfileList, PressMethod) -> + ct:log( + ?INFO, + "Basic riak test with KeyCount ~w ObjSize ~w", + [KeyCount, ObjSize] + ), + IndexCount = 100000, + + GetFetches = KeyCount div 4, + HeadFetches = KeyCount div 2, + IndexesReturned = KeyCount * 2, + + RootPath = testutil:reset_filestructure("riakLoad"), + StartOpts1 = + [{root_path, RootPath}, + {sync_strategy, testutil:sync_strategy()}, + {log_level, warn}, + {compression_method, PressMethod}, + {forced_logs, + [b0015, b0016, b0017, b0018, p0032, sst12]} + ], + {ok, Bookie1} = leveled_bookie:book_start(StartOpts1), - BucketList = - lists:map(fun(I) -> list_to_binary(integer_to_list(I)) end, - lists:seq(1, BucketCount)), - - MapFun = - fun(B) -> - testutil:generate_objects(ObjectCount, 1, [], - leveled_rand:rand_bytes(100), - fun() -> [] end, - B) - end, - ObjLofL = lists:map(MapFun, BucketList), - lists:foreach(fun(ObjL) -> testutil:riakload(Bookie1, ObjL) end, ObjLofL), - BucketFold = - fun(B, _K, _V, Acc) -> - case sets:is_element(B, Acc) of - true -> - Acc; - false -> - sets:add_element(B, Acc) + + IndexGenFun = + fun(ListID) -> + fun() -> + RandInt = leveled_rand:uniform(IndexCount - 1), + IntIndex = "integer" ++ integer_to_list(ListID) ++ "_int", + BinIndex = "binary" ++ integer_to_list(ListID) ++ "_bin", + [{add, list_to_binary(IntIndex), RandInt}, + {add, list_to_binary(IntIndex), RandInt + 1}, + {add, list_to_binary(BinIndex), <>}, + {add, list_to_binary(BinIndex), <<(RandInt + 1):32/integer>>}] end end, - FBAccT = {BucketFold, sets:new()}, - {async, BucketFolder1} = - leveled_bookie:book_headfold(Bookie1, - ?RIAK_TAG, - {bucket_list, BucketList}, - FBAccT, - false, false, false), + CountPerList = KeyCount div 10, - {FoldTime1, BucketList1} = timer:tc(BucketFolder1, []), - true = BucketCount == sets:size(BucketList1), - ok = leveled_bookie:book_close(Bookie1), + TC4 = load_chunk(Bookie1, CountPerList, ObjSize, IndexGenFun, Bucket, 4), + TC1 = load_chunk(Bookie1, CountPerList, ObjSize, IndexGenFun, Bucket, 1), + TC9 = load_chunk(Bookie1, CountPerList, ObjSize, IndexGenFun, Bucket, 9), + TC8 = load_chunk(Bookie1, CountPerList, ObjSize, IndexGenFun, Bucket, 8), + TC5 = load_chunk(Bookie1, CountPerList, ObjSize, IndexGenFun, Bucket, 5), + TC2 = load_chunk(Bookie1, CountPerList, ObjSize, IndexGenFun, Bucket, 2), + TC6 = load_chunk(Bookie1, CountPerList, ObjSize, IndexGenFun, Bucket, 6), + TC3 = load_chunk(Bookie1, CountPerList, ObjSize, IndexGenFun, Bucket, 3), + TC7 = load_chunk(Bookie1, CountPerList, ObjSize, IndexGenFun, Bucket, 7), + TC10 = load_chunk(Bookie1, CountPerList, ObjSize, IndexGenFun, Bucket, 10), - {ok, Bookie2} = leveled_bookie:book_start(StartOpts1), + ct:log( + ?INFO, + "Load time per group ~w ~w ~w ~w ~w ~w ~w ~w ~w ~w ms", + lists:map( + fun(T) -> T div 1000 end, + [TC4, TC1, TC9, TC8, TC5, TC2, TC6, TC3, TC7, TC10]) + ), + TotalLoadTime = + (TC1 + TC2 + TC3 + TC4 + TC5 + TC6 + TC7 + TC8 + TC9 + TC10) div 1000, + ct:log(?INFO, "Total load time ~w ms", [TotalLoadTime]), + + {MT0, MP0, MB0} = memory_usage(), + + TotalHeadTime = + random_fetches(head, Bookie1, Bucket, KeyCount, HeadFetches), - {async, BucketFolder2} = - leveled_bookie:book_headfold(Bookie2, - ?RIAK_TAG, - {bucket_list, BucketList}, - FBAccT, - false, false, false), - {FoldTime2, BucketList2} = timer:tc(BucketFolder2, []), - true = BucketCount == sets:size(BucketList2), + {MT1, MP1, MB1} = memory_usage(), + + TotalGetTime = + random_fetches(get, Bookie1, Bucket, KeyCount, GetFetches), - io:format("Fold pre-close ~w ms post-close ~w ms~n", - [FoldTime1 div 1000, FoldTime2 div 1000]), + {MT2, MP2, MB2} = memory_usage(), + + QuerySize = max(10, IndexCount div 1000), + MiniQuerySize = max(1, IndexCount div 50000), + TotalQueryTime = + random_queries( + Bookie1, + Bucket, + 10, + IndexCount, + QuerySize, + IndexesReturned), + TotalMiniQueryTime = + random_queries( + Bookie1, + Bucket, + 10, + IndexCount, + MiniQuerySize, + IndexesReturned div 8), + + {MT3, MP3, MB3} = memory_usage(), + + {FullFoldTime, SegFoldTime} = size_estimate_summary(Bookie1), + + {MT4, MP4, MB4} = memory_usage(), + + TotalUpdateTime = + rotate_chunk(Bookie1, <<"UpdBucket">>, KeyCount div 50, ObjSize), + + {MT5, MP5, MB5} = memory_usage(), + + DiskSpace = lists:nth(1, string:tokens(os:cmd("du -sh riakLoad"), "\t")), + ct:log(?INFO, "Disk space taken by test ~s", [DiskSpace]), + + MemoryUsage = erlang:memory(), + ct:log(?INFO, "Memory in use at end of test ~p", [MemoryUsage]), + + ProfileData = + {Bookie1, Bucket, KeyCount, ObjSize, IndexCount, IndexesReturned}, + lists:foreach( + fun(P) -> + ct:log(?INFO, "Profile of ~w", [P]), + P0 = + case P of + mini_query -> + {mini_query, MiniQuerySize}; + query -> + {query, QuerySize}; + head -> + {head, HeadFetches}; + get -> + {get, GetFetches}; + load -> + {load, IndexGenFun}; + P -> + P + end, + ProFun = profile_fun(P0, ProfileData), + profile_test(Bookie1, ProFun) + end, + ProfileList), + + {_Inker, _Pcl, SSTPids, _PClerk, CDBPids, _IClerk} = get_pids(Bookie1), + leveled_bookie:book_destroy(Bookie1), + + {KeyCount, ObjSize, PressMethod, + TotalLoadTime, + TotalHeadTime, TotalGetTime, + TotalQueryTime, TotalMiniQueryTime, FullFoldTime, SegFoldTime, + TotalUpdateTime, + DiskSpace, + {(MT0 + MT1 + MT2 + MT3 + MT4 + MT5) div 6000000, + (MP0 + MP1 + MP2 + MP3 + MP4 + MP5) div 6000000, + (MB0 + MB1 + MB2 + MB3 + MB4 + MB5) div 6000000}, + SSTPids, CDBPids}. + + +profile_test(Bookie, ProfileFun) -> + {Inker, Pcl, SSTPids, PClerk, CDBPids, IClerk} = get_pids(Bookie), + TestPid = self(), + profile_app( + [TestPid, Bookie, Inker, IClerk, Pcl, PClerk] ++ SSTPids ++ CDBPids, + ProfileFun). + +get_pids(Bookie) -> + {ok, Inker, Pcl} = leveled_bookie:book_returnactors(Bookie), + SSTPids = leveled_penciller:pcl_getsstpids(Pcl), + PClerk = leveled_penciller:pcl_getclerkpid(Pcl), + CDBPids = leveled_inker:ink_getcdbpids(Inker), + IClerk = leveled_inker:ink_getclerkpid(Inker), + {Inker, Pcl, SSTPids, PClerk, CDBPids, IClerk}. + +output_result( + {KeyCount, ObjSize, PressMethod, + TotalLoadTime, + TotalHeadTime, TotalGetTime, + TotalQueryTime, TotalMiniQueryTime, TotalFullFoldTime, TotalSegFoldTime, + TotalUpdateTime, + DiskSpace, + {TotalMemoryMB, ProcessMemoryMB, BinaryMemoryMB}, + SSTPids, CDBPids} +) -> + %% TODO ct:pal not working? even with rebar3 ct --verbose? + io:format( + user, + "~n" + "Outputs from profiling with KeyCount ~w ObjSize ~w Compression ~w:~n" + "TotalLoadTime - ~w ms~n" + "TotalHeadTime - ~w ms~n" + "TotalGetTime - ~w ms~n" + "TotalQueryTime - ~w ms~n" + "TotalMiniQueryTime - ~w ms~n" + "TotalFullFoldTime - ~w ms~n" + "TotalAAEFoldTime - ~w ms~n" + "TotalUpdateTime - ~w ms~n" + "Disk space required for test - ~s~n" + "Average Memory usage for test - Total ~p Proc ~p Bin ~p MB~n" + "Closing count of SST Files - ~w~n" + "Closing count of CDB Files - ~w~n", + [KeyCount, ObjSize, PressMethod, + TotalLoadTime, TotalHeadTime, TotalGetTime, + TotalQueryTime, TotalMiniQueryTime, TotalFullFoldTime, TotalSegFoldTime, + TotalUpdateTime, + DiskSpace, + TotalMemoryMB, ProcessMemoryMB, BinaryMemoryMB, + length(SSTPids), length(CDBPids)] + ). + +memory_usage() -> + garbage_collect(), % GC the test process + MemoryUsage = erlang:memory(), + {element(2, lists:keyfind(total, 1, MemoryUsage)), + element(2, lists:keyfind(processes, 1, MemoryUsage)), + element(2, lists:keyfind(binary, 1, MemoryUsage))}. + +profile_app(Pids, ProfiledFun) -> + + eprof:start(), + eprof:start_profiling(Pids), + + ProfiledFun(), + + eprof:stop_profiling(), + eprof:analyze(total), + eprof:stop(). + +size_estimate_summary(Bookie) -> + Loops = 10, + ct:log( + ?INFO, + "Size Estimate Tester (SET) started with Loops ~w", + [Loops] + ), + {{TotalGuessTime, TotalEstimateTime, TotalCountTime}, + {TotalEstimateVariance, TotalGuessVariance}} = + lists:foldl( + fun(_I, {{GT, ET, CT}, {AET, AGT}}) -> + {{GT0, ET0, CT0}, {AE0, AG0}} = size_estimate_tester(Bookie), + {{GT + GT0, ET + ET0, CT + CT0}, {AET + AE0, AGT + AG0}} + end, + {{0, 0, 0}, {0, 0}}, + lists:seq(1, Loops) + ), + ct:log( + ?INFO, + "SET: MeanGuess ~w ms MeanEstimate ~w ms MeanCount ~w ms", + [TotalGuessTime div 10000, + TotalEstimateTime div 10000, + TotalCountTime div 10000] + ), + ct:log( + ?INFO, + "Mean variance in Estimate ~w Guess ~w", + [TotalEstimateVariance div Loops, TotalGuessVariance div Loops] + ), + %% Assume that segment-list folds are 10 * as common as all folds + {TotalCountTime div 1000, (TotalGuessTime + TotalEstimateTime) div 1000}. + + +rotate_chunk(Bookie, Bucket, KeyCount, ObjSize) -> + ct:log( + ?INFO, + "Rotating an ObjList ~w - " + "time includes object generation", + [KeyCount]), + V1 = base64:encode(leveled_rand:rand_bytes(ObjSize)), + V2 = base64:encode(leveled_rand:rand_bytes(ObjSize)), + V3 = base64:encode(leveled_rand:rand_bytes(ObjSize)), + {TC, ok} = + timer:tc( + fun() -> + testutil:rotation_withnocheck( + Bookie, Bucket, KeyCount, V1, V2, V3) + end), + TC div 1000. + +load_chunk(Bookie, CountPerList, ObjSize, IndexGenFun, Bucket, Chunk) -> + ct:log(?INFO, "Generating and loading ObjList ~w", [Chunk]), + ObjList = + generate_chunk(CountPerList, ObjSize, IndexGenFun, Bucket, Chunk), + {TC, ok} = timer:tc(fun() -> testutil:riakload(Bookie, ObjList) end), + garbage_collect(), + timer:sleep(2000), + TC. + +generate_chunk(CountPerList, ObjSize, IndexGenFun, Bucket, Chunk) -> + testutil:generate_objects( + CountPerList, + {fixed_binary, (Chunk - 1) * CountPerList + 1}, [], + base64:encode(leveled_rand:rand_bytes(ObjSize)), + IndexGenFun(Chunk), + Bucket + ). + +size_estimate_tester(Bookie) -> + %% Data size test - calculate data size, then estimate data size + {CountTS, Count} = counter(Bookie, full), + {CountTSEstimate, CountEstimate} = counter(Bookie, estimate), + {CountTSGuess, CountGuess} = counter(Bookie, guess), + {GuessTolerance, EstimateTolerance} = + case Count of + C when C < 500000 -> + {0.20, 0.15}; + C when C < 1000000 -> + {0.12, 0.1}; + C when C < 2000000 -> + {0.1, 0.08}; + _C -> + {0.08, 0.05} + end, + + true = + ((CountGuess / Count) > (1.0 - GuessTolerance)) + and ((CountGuess / Count) < (1.0 + GuessTolerance)), + true = + ((CountEstimate / Count) > (1.0 - EstimateTolerance)) + and ((CountEstimate / Count) < (1.0 + EstimateTolerance)), + {{CountTSGuess, CountTSEstimate, CountTS}, + {abs(CountEstimate - Count), abs(CountGuess - Count)}}. + +counter(Bookie, full) -> + {async, DataSizeCounter} = + leveled_bookie:book_headfold( + Bookie, + ?RIAK_TAG, + {fun(_B, _K, _V, AccC) -> AccC + 1 end, 0}, + false, + true, + false + ), + timer:tc(DataSizeCounter); +counter(Bookie, guess) -> + TictacTreeSize = 1024 * 1024, + RandomSegment = rand:uniform(TictacTreeSize - 32) - 1, + {async, DataSizeGuesser} = + leveled_bookie:book_headfold( + Bookie, + ?RIAK_TAG, + {fun(_B, _K, _V, AccC) -> AccC + 1024 end, 0}, + false, + true, + lists:seq(RandomSegment, RandomSegment + 31) + ), + timer:tc(DataSizeGuesser); +counter(Bookie, estimate) -> + TictacTreeSize = 1024 * 1024, + RandomSegment = rand:uniform(TictacTreeSize - 128) - 1, + {async, DataSizeEstimater} = + leveled_bookie:book_headfold( + Bookie, + ?RIAK_TAG, + {fun(_B, _K, _V, AccC) -> AccC + 256 end, 0}, + false, + true, + lists:seq(RandomSegment, RandomSegment + 127) + ), + timer:tc(DataSizeEstimater). + + +random_fetches(FetchType, Bookie, Bucket, ObjCount, Fetches) -> + KeysToFetch = + lists:map( + fun(I) -> + Twenty = ObjCount div 5, + case I rem 5 of + 1 -> + testutil:fixed_bin_key( + Twenty + leveled_rand:uniform(ObjCount - Twenty)); + _ -> + testutil:fixed_bin_key(leveled_rand:uniform(Twenty)) + end + end, + lists:seq(1, Fetches) + ), + {TC, ok} = + timer:tc( + fun() -> + lists:foreach( + fun(K) -> + {ok, _} = + case FetchType of + get -> + testutil:book_riakget(Bookie, Bucket, K); + head -> + testutil:book_riakhead(Bookie, Bucket, K) + end + end, + KeysToFetch + ) + end + ), + ct:log( + ?INFO, + "Fetch of type ~w ~w keys in ~w ms", + [FetchType, Fetches, TC div 1000] + ), + TC div 1000. + +random_queries(Bookie, Bucket, IDs, IdxCnt, MaxRange, IndexesReturned) -> + QueryFun = + fun() -> + ID = leveled_rand:uniform(IDs), + BinIndex = + list_to_binary("binary" ++ integer_to_list(ID) ++ "_bin"), + Twenty = IdxCnt div 5, + RI = leveled_rand:uniform(MaxRange), + [Start, End] = + case RI of + RI when RI < (MaxRange div 5) -> + R0 = leveled_rand:uniform(IdxCnt - (Twenty + RI)), + [R0 + Twenty, R0 + Twenty + RI]; + _ -> + R0 = leveled_rand:uniform(Twenty - RI), + [R0, R0 + RI] + end, + FoldKeysFun = fun(_B, _K, Cnt) -> Cnt + 1 end, + {async, R} = + leveled_bookie:book_indexfold( + Bookie, + {Bucket, <<>>}, + {FoldKeysFun, 0}, + {BinIndex, <>, <>}, + {true, undefined}), + R() + end, + + {TC, {QC, EF}} = + timer:tc(fun() -> run_queries(QueryFun, 0, 0, IndexesReturned) end), + ct:log( + ?INFO, + "Fetch of ~w index entries in ~w queries took ~w ms", + [EF, QC, TC div 1000] + ), + TC div 1000. - true = FoldTime1 < 10 * FoldTime2, - %% The fold in-memory should be the same order of magnitude of response - %% time as the fold post-persistence - ok = leveled_bookie:book_destroy(Bookie2). +run_queries(_QueryFun, QueryCount, EntriesFound, TargetEntries) + when EntriesFound >= TargetEntries -> + {QueryCount, EntriesFound}; +run_queries(QueryFun, QueryCount, EntriesFound, TargetEntries) -> + Matches = QueryFun(), + run_queries( + QueryFun, QueryCount + 1, EntriesFound + Matches, TargetEntries). +profile_fun(false, _ProfileData) -> + fun() -> ok end; +profile_fun( + {mini_query, QuerySize}, + {Bookie, Bucket, _KeyCount, _ObjSize, IndexCount, IndexesReturned}) -> + fun() -> + random_queries( + Bookie, Bucket, 10, IndexCount, QuerySize, IndexesReturned div 8) + end; +profile_fun( + {query, QuerySize}, + {Bookie, Bucket, _KeyCount, _ObjSize, IndexCount, IndexesReturned}) -> + fun() -> + random_queries( + Bookie, Bucket, 10, IndexCount, QuerySize, IndexesReturned) + end; +profile_fun( + {head, HeadFetches}, + {Bookie, Bucket, KeyCount, _ObjSize, _IndexCount, _IndexesReturned}) -> + fun() -> + random_fetches(head, Bookie, Bucket, KeyCount, HeadFetches) + end; +profile_fun( + {get, GetFetches}, + {Bookie, Bucket, KeyCount, _ObjSize, _IndexCount, _IndexesReturned}) -> + fun() -> + random_fetches(get, Bookie, Bucket, KeyCount, GetFetches) + end; +profile_fun( + {load, IndexGenFun}, + {Bookie, Bucket, KeyCount, ObjSize, _IndexCount, _IndexesReturned}) -> + ObjList11 = + generate_chunk(KeyCount div 10, ObjSize, IndexGenFun, Bucket, 11), + fun() -> + testutil:riakload(Bookie, ObjList11) + end; +profile_fun( + update, + {Bookie, _Bucket, KeyCount, ObjSize, _IndexCount, _IndexesReturned}) -> + fun() -> + rotate_chunk(Bookie, <<"ProfileB">>, KeyCount div 50, ObjSize) + end; +profile_fun( + CounterFold, + {Bookie, _Bucket, _KeyCount, _ObjSize, _IndexCount, _IndexesReturned}) -> + fun() -> + lists:foreach( + fun(_I) -> + _ = counter(Bookie, CounterFold) + end, + lists:seq(1, 10) + ) + end. diff --git a/test/end_to_end/testutil.erl b/test/end_to_end/testutil.erl index 84774fad..f8d786a1 100644 --- a/test/end_to_end/testutil.erl +++ b/test/end_to_end/testutil.erl @@ -44,11 +44,13 @@ update_some_objects/3, delete_some_objects/3, put_indexed_objects/3, + put_indexed_objects/4, put_altered_indexed_objects/3, put_altered_indexed_objects/4, put_altered_indexed_objects/5, check_indexed_objects/4, rotating_object_check/3, + rotation_withnocheck/6, corrupt_journal/5, restore_file/2, restore_topending/2, @@ -754,6 +756,9 @@ check_indexed_objects(Book, B, KSpecL, V) -> put_indexed_objects(Book, Bucket, Count) -> V = get_compressiblevalue(), + put_indexed_objects(Book, Bucket, Count, V). + +put_indexed_objects(Book, Bucket, Count, V) -> IndexGen = get_randomindexes_generator(1), SW = os:timestamp(), ObjL1 = @@ -837,6 +842,12 @@ rotating_object_check(RootPath, B, NumberOfObjects) -> ok = leveled_bookie:book_close(Book2), ok. +rotation_withnocheck(Book1, B, NumberOfObjects, V1, V2, V3) -> + {KSpcL1, _V1} = put_indexed_objects(Book1, B, NumberOfObjects, V1), + {KSpcL2, _V2} = put_altered_indexed_objects(Book1, B, KSpcL1, true, V2), + {_KSpcL3, _V3} = put_altered_indexed_objects(Book1, B, KSpcL2, true, V3), + ok. + corrupt_journal(RootPath, FileName, Corruptions, BasePosition, GapSize) -> OriginalPath = RootPath ++ "/journal/journal_files/" ++ FileName, BackupPath = RootPath ++ "/journal/journal_files/" ++